mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-01-26 17:10:18 +00:00
Compare commits
108 Commits
history-fi
...
3792-langu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
426694b002 | ||
|
|
1ab4ca63ae | ||
|
|
3ee24a4b9c | ||
|
|
220499fd0f | ||
|
|
50c798f498 | ||
|
|
fce47cde95 | ||
|
|
b3eb88b6d2 | ||
|
|
2b9618bbb5 | ||
|
|
aa73ce2ee6 | ||
|
|
0cbf345e84 | ||
|
|
d65e08e7c8 | ||
|
|
10f233a939 | ||
|
|
52911d699f | ||
|
|
7e886e0c56 | ||
|
|
151e603af7 | ||
|
|
7311af4b58 | ||
|
|
af193e8d7a | ||
|
|
9e2acadb7e | ||
|
|
48da93b4ec | ||
|
|
0c1adc8906 | ||
|
|
9e5a0a0209 | ||
|
|
9b96689072 | ||
|
|
5e5674f48d | ||
|
|
272e68ad2e | ||
|
|
01e06979d8 | ||
|
|
e45c77d51d | ||
|
|
bee1130c6e | ||
|
|
5f8448d0e2 | ||
|
|
9438d38dc6 | ||
|
|
d0c66758c2 | ||
|
|
9e8a9d5907 | ||
|
|
7449be39fb | ||
|
|
e9f3d0bce4 | ||
|
|
2abc8aa9b4 | ||
|
|
69b70a2a07 | ||
|
|
0c42bcb8d6 | ||
|
|
091c708a28 | ||
|
|
084be9c990 | ||
|
|
6db1085337 | ||
|
|
66553e106d | ||
|
|
5b01dbd9f8 | ||
|
|
c86f214fc3 | ||
|
|
32149640d9 | ||
|
|
15f16455fc | ||
|
|
15cdfac9d9 | ||
|
|
04de397916 | ||
|
|
4643082c5b | ||
|
|
3b2b74e62d | ||
|
|
68354cf53d | ||
|
|
3e364e0eba | ||
|
|
06ea29bfc7 | ||
|
|
f4e178955c | ||
|
|
51d531d732 | ||
|
|
e40c4ca97d | ||
|
|
b8ede70f3a | ||
|
|
50b349b464 | ||
|
|
67d097cca7 | ||
|
|
494385a379 | ||
|
|
c2ee84b753 | ||
|
|
c1e0296cda | ||
|
|
f041223c38 | ||
|
|
d36738d7ef | ||
|
|
e51ff34c89 | ||
|
|
ba4ed9cf27 | ||
|
|
33b7f1684d | ||
|
|
3d14df6a11 | ||
|
|
08ce1e28ce | ||
|
|
e4118a1620 | ||
|
|
64d0c09b08 | ||
|
|
008e5eb024 | ||
|
|
e6553065fd | ||
|
|
de996a4566 | ||
|
|
4784ae4cd0 | ||
|
|
39274f121c | ||
|
|
4b1d871078 | ||
|
|
f78c2dcffd | ||
|
|
1c2c22b8df | ||
|
|
3276a9347a | ||
|
|
d763bb4267 | ||
|
|
be3c9892e0 | ||
|
|
0be5005776 | ||
|
|
12ce03c0bb | ||
|
|
3767a2d5b9 | ||
|
|
71c8d8b1b1 | ||
|
|
20cbe6f510 | ||
|
|
3a6e1f908f | ||
|
|
73fdbf24e3 | ||
|
|
629f939224 | ||
|
|
48299e5738 | ||
|
|
5b1b70b8ab | ||
|
|
678d568b37 | ||
|
|
fb15b62fb9 | ||
|
|
8dc39d4a3d | ||
|
|
805cd618d4 | ||
|
|
4ba5fcce8f | ||
|
|
b9305faf21 | ||
|
|
3d3b53831e | ||
|
|
2ae29ab78f | ||
|
|
caffd804fe | ||
|
|
c58a97f69d | ||
|
|
e2b407c6f3 | ||
|
|
d65a2c784d | ||
|
|
9bc812a167 | ||
|
|
fd2080567d | ||
|
|
969c75e7be | ||
|
|
4b14cec5f4 | ||
|
|
a8d5ea067d | ||
|
|
2f6873f7d5 |
16
.github/test/Dockerfile-alpine
vendored
16
.github/test/Dockerfile-alpine
vendored
@@ -7,6 +7,8 @@ ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt /requirements.txt
|
||||
|
||||
ARG TARGETPLATFORM
|
||||
|
||||
RUN \
|
||||
apk add --update --no-cache --virtual=build-dependencies \
|
||||
build-base \
|
||||
@@ -27,7 +29,19 @@ RUN \
|
||||
file \
|
||||
nodejs \
|
||||
poppler-utils \
|
||||
python3 && \
|
||||
python3 \
|
||||
glib \
|
||||
libsm \
|
||||
libxext \
|
||||
libxrender && \
|
||||
case "$TARGETPLATFORM" in \
|
||||
linux/arm/v7|linux/arm/v8) \
|
||||
echo "INFO: Skipping py3-opencv on $TARGETPLATFORM (using pixelmatch fallback)" \
|
||||
;; \
|
||||
*) \
|
||||
apk add --update --no-cache py3-opencv || echo "WARN: py3-opencv install failed, using pixelmatch fallback" \
|
||||
;; \
|
||||
esac && \
|
||||
echo "**** pip3 install test of changedetection.io ****" && \
|
||||
python3 -m venv /lsiopy && \
|
||||
pip install -U pip wheel setuptools && \
|
||||
|
||||
2
.github/workflows/codeql-analysis.yml
vendored
2
.github/workflows/codeql-analysis.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v6
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
|
||||
22
.github/workflows/containers.yml
vendored
22
.github/workflows/containers.yml
vendored
@@ -39,14 +39,14 @@ jobs:
|
||||
# Or if we are in a tagged release scenario.
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v4
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
|
||||
@@ -93,16 +93,27 @@ jobs:
|
||||
driver-opts: image=moby/buildkit:master
|
||||
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
||||
ghcr.io/${{ github.repository }}
|
||||
tags: |
|
||||
type=raw,value=dev
|
||||
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref }} == "refs/heads/master"
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
|
||||
tags: ${{ steps.meta_dev.outputs.tags }}
|
||||
labels: ${{ steps.meta_dev.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
@@ -141,6 +152,7 @@ jobs:
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
8
.github/workflows/pypi-release.yml
vendored
8
.github/workflows/pypi-release.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v5
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
4
.github/workflows/test-container-build.yml
vendored
4
.github/workflows/test-container-build.yml
vendored
@@ -44,14 +44,14 @@ jobs:
|
||||
- platform: linux/arm64
|
||||
dockerfile: ./.github/test/Dockerfile-alpine
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v4
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
|
||||
|
||||
2
.github/workflows/test-only.yml
vendored
2
.github/workflows/test-only.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
lint-code:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
- name: Lint with Ruff
|
||||
run: |
|
||||
pip install ruff
|
||||
|
||||
@@ -21,7 +21,7 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
||||
uses: actions/setup-python@v6
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v4
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt') }}
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v5
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/test-changedetectionio.tar
|
||||
@@ -66,10 +66,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -84,6 +84,7 @@ jobs:
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
@@ -93,10 +94,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -119,7 +120,7 @@ jobs:
|
||||
|
||||
- name: Store test artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v5
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
@@ -132,10 +133,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -174,10 +175,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -214,10 +215,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -250,10 +251,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -279,10 +280,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -319,10 +320,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -350,10 +351,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -395,10 +396,10 @@ jobs:
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -21,6 +21,7 @@ venv/
|
||||
# IDEs
|
||||
.idea
|
||||
.vscode/settings.json
|
||||
*~
|
||||
|
||||
# Datastore files
|
||||
datastore/
|
||||
|
||||
37
Dockerfile
37
Dockerfile
@@ -34,6 +34,7 @@ ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
|
||||
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
|
||||
# Additional environment variables for cryptography Rust build
|
||||
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
|
||||
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--prefer-binary \
|
||||
@@ -43,7 +44,6 @@ RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
-r /requirements.txt
|
||||
|
||||
|
||||
# Playwright is an alternative to Selenium
|
||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
|
||||
@@ -52,13 +52,38 @@ RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
--prefer-binary \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
playwright~=1.48.0 \
|
||||
playwright~=1.56.0 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
# OpenCV is optional for fast image comparison (pixelmatch is the fallback)
|
||||
# Skip on arm/v7 and arm/v8 where builds take weeks - excluded from requirements.txt
|
||||
ARG TARGETPLATFORM
|
||||
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
case "$TARGETPLATFORM" in \
|
||||
linux/arm/v7|linux/arm/v8) \
|
||||
echo "INFO: Skipping OpenCV on $TARGETPLATFORM (build takes too long), using pixelmatch fallback" \
|
||||
;; \
|
||||
*) \
|
||||
pip install \
|
||||
--prefer-binary \
|
||||
--extra-index-url https://www.piwheels.org/simple \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
opencv-python-headless>=4.8.0.76 \
|
||||
|| echo "WARN: OpenCV install failed, will use pixelmatch fallback" \
|
||||
;; \
|
||||
esac
|
||||
|
||||
|
||||
# Final image stage
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm
|
||||
LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"
|
||||
LABEL org.opencontainers.image.url="https://changedetection.io"
|
||||
LABEL org.opencontainers.image.documentation="https://changedetection.io/tutorials"
|
||||
LABEL org.opencontainers.image.title="changedetection.io"
|
||||
LABEL org.opencontainers.image.description="Self-hosted web page change monitoring and notification service"
|
||||
LABEL org.opencontainers.image.licenses="Apache-2.0"
|
||||
LABEL org.opencontainers.image.vendor="changedetection.io"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libxslt1.1 \
|
||||
@@ -69,6 +94,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
# favicon type detection and other uses
|
||||
file \
|
||||
zlib1g \
|
||||
# OpenCV dependencies for image processing
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender-dev \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
@@ -89,6 +119,9 @@ EXPOSE 5000
|
||||
# The actual flask app module
|
||||
COPY changedetectionio /app/changedetectionio
|
||||
|
||||
# Compile translation files for i18n support
|
||||
RUN pybabel compile -d /app/changedetectionio/translations
|
||||
|
||||
# Also for OpenAPI validation wrapper - needs the YML
|
||||
RUN [ ! -d "/app/docs" ] && mkdir /app/docs
|
||||
COPY docs/api-spec.yaml /app/docs/api-spec.yaml
|
||||
|
||||
@@ -11,10 +11,12 @@ recursive-include changedetectionio/realtime *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
recursive-include changedetectionio/translations *
|
||||
recursive-include changedetectionio/widgets *
|
||||
prune changedetectionio/static/package-lock.json
|
||||
prune changedetectionio/static/styles/node_modules
|
||||
prune changedetectionio/static/styles/package-lock.json
|
||||
include changedetectionio/favicon_utils.py
|
||||
include changedetection.py
|
||||
include requirements.txt
|
||||
include README-pip.md
|
||||
|
||||
@@ -14,7 +14,7 @@ Ideal for monitoring price changes, content edits, conditional changes and more.
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes" title="Self-hosted web page change monitoring, list of websites with changes" />](https://changedetection.io)
|
||||
|
||||
|
||||
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ Available when connected to a <a href="https://github.com/dgtlmoon/changedetecti
|
||||
|
||||
### Perform interactive browser steps
|
||||
|
||||
Fill in text boxes, click buttons and more, setup your changedetection scenario.
|
||||
Fill in text boxes, click buttons and more, setup your changedetection scenario.
|
||||
|
||||
Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches.
|
||||
|
||||
@@ -54,7 +54,7 @@ Requires Playwright to be enabled.
|
||||
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
|
||||
- COVID related news from government websites
|
||||
- University/organisation news from their website
|
||||
- Detect and monitor changes in JSON API responses
|
||||
- Detect and monitor changes in JSON API responses
|
||||
- JSON API monitoring and alerting
|
||||
- Changes in legal and other documents
|
||||
- Trigger API calls via notifications when text appears on a website
|
||||
@@ -86,7 +86,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
||||
|
||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
|
||||
|
||||
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.
|
||||
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residential, ISP, Rotating and many other proxy types to suit your project.
|
||||
|
||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||
|
||||
@@ -106,4 +106,3 @@ $ changedetection.io -d /path/to/empty/data/dir -p 5000
|
||||
Then visit http://127.0.0.1:5000 , You should now be able to access the UI.
|
||||
|
||||
See https://changedetection.io for more information.
|
||||
|
||||
|
||||
@@ -183,6 +183,9 @@ docker compose pull && docker compose up -d
|
||||
|
||||
See the wiki for more information https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
## Different browser viewport sizes (mobile, desktop etc)
|
||||
|
||||
If you are using the recommended `sockpuppetbrowser` (which is in the docker-compose.yml as a setting to be uncommented) you can easily set different viewport sizes for your web page change detection, [see more information here about setting up different viewport sizes](https://github.com/dgtlmoon/sockpuppetbrowser?tab=readme-ov-file#setting-viewport-size).
|
||||
|
||||
## Filters
|
||||
|
||||
|
||||
5
babel.cfg
Normal file
5
babel.cfg
Normal file
@@ -0,0 +1,5 @@
|
||||
[python: **.py]
|
||||
keywords = _:1,_l:1,gettext:1
|
||||
|
||||
[jinja2: **/templates/**.html]
|
||||
encoding = utf-8
|
||||
@@ -64,7 +64,7 @@ def count_words_in_history(watch):
|
||||
return 0
|
||||
|
||||
latest_key = list(watch.history.keys())[-1]
|
||||
latest_content = watch.get_history_snapshot(latest_key)
|
||||
latest_content = watch.get_history_snapshot(timestamp=latest_key)
|
||||
return len(latest_content.split())
|
||||
except Exception as e:
|
||||
logger.error(f"Error counting words: {str(e)}")
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.50.39'
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.52.8'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
import logging
|
||||
import os
|
||||
import getopt
|
||||
import platform
|
||||
@@ -19,6 +20,58 @@ from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
from loguru import logger
|
||||
|
||||
# ==============================================================================
|
||||
# Multiprocessing Configuration - CRITICAL for Thread Safety
|
||||
# ==============================================================================
|
||||
#
|
||||
# PROBLEM: Python 3.12+ warns about fork() with multi-threaded processes:
|
||||
# "This process is multi-threaded, use of fork() may lead to deadlocks"
|
||||
#
|
||||
# WHY IT'S DANGEROUS:
|
||||
# 1. This Flask app has multiple threads (HTTP handlers, workers, SocketIO)
|
||||
# 2. fork() copies ONLY the calling thread to the child process
|
||||
# 3. BUT fork() also copies all locks/mutexes in their current state
|
||||
# 4. If another thread held a lock during fork() → child has locked lock with no owner
|
||||
# 5. Result: PERMANENT DEADLOCK if child tries to acquire that lock
|
||||
#
|
||||
# SOLUTION: Use 'spawn' instead of 'fork'
|
||||
# - spawn starts a fresh Python interpreter (no inherited threads or locks)
|
||||
# - Slower (~200ms vs ~1ms) but safe with multi-threaded parent
|
||||
# - Consistent across all platforms (Windows already uses spawn by default)
|
||||
#
|
||||
# IMPLEMENTATION:
|
||||
# 1. Explicit contexts everywhere (primary protection):
|
||||
# - playwright.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - puppeteer.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - isolated_opencv.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - isolated_libvips.py: ctx = multiprocessing.get_context('spawn')
|
||||
#
|
||||
# 2. Global default (defense-in-depth, below):
|
||||
# - Safety net if future code forgets explicit context
|
||||
# - Protects against third-party libraries using Process()
|
||||
# - Costs nothing (explicit contexts always override it)
|
||||
#
|
||||
# WHY BOTH?
|
||||
# - Explicit contexts: Clear, self-documenting, always works
|
||||
# - Global default: Safety net for forgotten contexts or library code
|
||||
# - If someone writes "Process()" instead of "ctx.Process()", still safe!
|
||||
#
|
||||
# See: https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
||||
# ==============================================================================
|
||||
|
||||
import multiprocessing
|
||||
import sys
|
||||
|
||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||
if 'pytest' not in sys.modules:
|
||||
try:
|
||||
if multiprocessing.get_start_method(allow_none=True) is None:
|
||||
multiprocessing.set_start_method('spawn', force=False)
|
||||
logger.debug("Set multiprocessing default to 'spawn' for thread safety (explicit contexts used everywhere)")
|
||||
except RuntimeError:
|
||||
logger.debug(f"Multiprocessing start method already set: {multiprocessing.get_start_method()}")
|
||||
|
||||
# Only global so we can access it in the signal handler
|
||||
app = None
|
||||
datastore = None
|
||||
@@ -74,6 +127,12 @@ def main():
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
# Optional URL to watch since start
|
||||
default_url = None
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
include_default_watches = True
|
||||
|
||||
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
|
||||
port = int(os.environ.get('PORT', 5000))
|
||||
ssl_mode = False
|
||||
@@ -87,15 +146,13 @@ def main():
|
||||
datastore_path = os.path.join(os.getcwd(), "../datastore")
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:", "port")
|
||||
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
|
||||
sys.exit(2)
|
||||
|
||||
create_datastore_dir = False
|
||||
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
# Set a logger level via shell env variable
|
||||
# Used: Dockerfile for CICD
|
||||
# To set logger level for pytest, see the app function in tests/conftest.py
|
||||
@@ -116,6 +173,10 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
if opt == '-u':
|
||||
default_url = arg
|
||||
include_default_watches = False
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
@@ -157,6 +218,11 @@ def main():
|
||||
" WARNING, ERROR, CRITICAL")
|
||||
sys.exit(2)
|
||||
|
||||
# Disable verbose pyppeteer logging to prevent memory leaks from large CDP messages
|
||||
# Set both parent and child loggers since pyppeteer hardcodes DEBUG level
|
||||
logging.getLogger('pyppeteer.connection').setLevel(logging.WARNING)
|
||||
logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING)
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
|
||||
@@ -172,13 +238,20 @@ def main():
|
||||
sys.exit(2)
|
||||
|
||||
try:
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
|
||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__, include_default_watches=include_default_watches)
|
||||
except JSONDecodeError as e:
|
||||
# Dont' start if the JSON DB looks corrupt
|
||||
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
|
||||
logger.critical(str(e))
|
||||
return
|
||||
|
||||
# Inject datastore into plugins that need access to settings
|
||||
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
|
||||
inject_datastore_into_plugins(datastore)
|
||||
|
||||
if default_url:
|
||||
datastore.add_watch(url = default_url)
|
||||
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Get the SocketIO instance from the Flask app (created in flask_app.py)
|
||||
@@ -214,7 +287,9 @@ def main():
|
||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False,
|
||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True),
|
||||
all_paused=datastore.data['settings']['application'].get('all_paused', False),
|
||||
all_muted=datastore.data['settings']['application'].get('all_muted', False)
|
||||
)
|
||||
|
||||
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
|
||||
|
||||
@@ -2,7 +2,9 @@ from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
|
||||
import threading
|
||||
from flask import request
|
||||
from . import auth
|
||||
|
||||
@@ -28,18 +30,36 @@ class Tag(Resource):
|
||||
abort(404, message=f'No tag exists with the UUID of {uuid}')
|
||||
|
||||
if request.args.get('recheck'):
|
||||
# Recheck all, including muted
|
||||
# Get most overdue first
|
||||
i=0
|
||||
# Recheck all watches with this tag, including muted
|
||||
# First collect watches to queue
|
||||
watches_to_queue = []
|
||||
for k in sorted(self.datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||
watch_uuid = k[0]
|
||||
watch = k[1]
|
||||
if not watch['paused'] and tag['uuid'] not in watch['tags']:
|
||||
continue
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
i+=1
|
||||
if not watch['paused'] and tag['uuid'] in watch['tags']:
|
||||
watches_to_queue.append(watch_uuid)
|
||||
|
||||
return f"OK, {i} watches queued", 200
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
return {'status': f'OK, queued {len(watches_to_queue)} watches for rechecking'}, 200
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
def queue_watches_background():
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
try:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
logger.info(f"Background queueing complete for tag {tag['uuid']}: {len(watches_to_queue)} watches queued")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing for tag {tag['uuid']}: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=queue_watches_background, daemon=True, name=f"QueueTag-{tag['uuid'][:8]}")
|
||||
thread.start()
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
|
||||
|
||||
@@ -1,17 +1,22 @@
|
||||
import os
|
||||
import threading
|
||||
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
from flask_expects_json import expects_json
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request, make_response, send_from_directory
|
||||
from . import auth
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
from changedetectionio import worker_handler
|
||||
from flask import request, make_response, send_from_directory
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
import copy
|
||||
|
||||
# Import schemas from __init__.py
|
||||
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
|
||||
from ..notification import valid_notification_formats
|
||||
from ..notification.handler import newline_re
|
||||
|
||||
|
||||
def validate_time_between_check_required(json_data):
|
||||
@@ -61,8 +66,21 @@ class Watch(Resource):
|
||||
@validate_openapi_request('getWatch')
|
||||
def get(self, uuid):
|
||||
"""Get information about a single watch, recheck, pause, or mute."""
|
||||
import time
|
||||
from copy import deepcopy
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
watch = None
|
||||
# Retry up to 20 times if dict is being modified
|
||||
# With sleep(0), this is fast: ~200µs best case, ~20ms worst case under heavy load
|
||||
for attempt in range(20):
|
||||
try:
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
break
|
||||
except RuntimeError:
|
||||
# Dict changed during deepcopy, retry after yielding to scheduler
|
||||
# sleep(0) releases GIL and yields - no fixed delay, just lets other threads run
|
||||
if attempt < 19: # Don't yield on last attempt
|
||||
time.sleep(0) # Yield to scheduler (microseconds, not milliseconds)
|
||||
|
||||
if not watch:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
@@ -113,19 +131,86 @@ class Watch(Resource):
|
||||
|
||||
if request.json.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not request.json.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
if not plist or request.json.get('proxy') not in plist:
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(request.json)
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# XSS etc protection
|
||||
if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
|
||||
return "Invalid URL", 400
|
||||
# XSS etc protection - validate URL if it's being updated
|
||||
if 'url' in request.json:
|
||||
new_url = request.json.get('url')
|
||||
|
||||
watch.update(request.json)
|
||||
# URL must be a non-empty string
|
||||
if new_url is None:
|
||||
return "URL cannot be null", 400
|
||||
|
||||
if not isinstance(new_url, str):
|
||||
return "URL must be a string", 400
|
||||
|
||||
if not new_url.strip():
|
||||
return "URL cannot be empty or whitespace only", 400
|
||||
|
||||
if not is_safe_valid_url(new_url.strip()):
|
||||
return "Invalid or unsupported URL format. URL must use http://, https://, or ftp:// protocol", 400
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
from changedetectionio import processors
|
||||
processor_config_data = {}
|
||||
regular_data = {}
|
||||
|
||||
for key, value in request.json.items():
|
||||
if key.startswith('processor_config_'):
|
||||
config_key = key.replace('processor_config_', '')
|
||||
if value: # Only save non-empty values
|
||||
processor_config_data[config_key] = value
|
||||
else:
|
||||
regular_data[key] = value
|
||||
|
||||
# Update watch with regular (non-processor-config) fields
|
||||
watch.update(regular_data)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = request.json.get('processor', watch.get('processor'))
|
||||
if processor_name:
|
||||
# Create a processor instance to access config methods
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
processor_instance = difference_detection_processor(self.datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"API: Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"API: Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"API: Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API: Failed to save processor config: {e}")
|
||||
|
||||
return "OK", 200
|
||||
|
||||
@@ -166,6 +251,10 @@ class WatchSingleHistory(Resource):
|
||||
if timestamp == 'latest':
|
||||
timestamp = list(watch.history.keys())[-1]
|
||||
|
||||
# Validate that the timestamp exists in history
|
||||
if timestamp not in watch.history:
|
||||
abort(404, message=f"No history snapshot found for timestamp '{timestamp}'")
|
||||
|
||||
if request.args.get('html'):
|
||||
content = watch.get_fetched_html(timestamp)
|
||||
if content:
|
||||
@@ -175,12 +264,130 @@ class WatchSingleHistory(Resource):
|
||||
response = make_response("No content found", 404)
|
||||
response.mimetype = "text/plain"
|
||||
else:
|
||||
content = watch.get_history_snapshot(timestamp)
|
||||
content = watch.get_history_snapshot(timestamp=timestamp)
|
||||
response = make_response(content, 200)
|
||||
response.mimetype = "text/plain"
|
||||
|
||||
return response
|
||||
|
||||
class WatchHistoryDiff(Resource):
|
||||
"""
|
||||
Generate diff between two historical snapshots.
|
||||
|
||||
Note: This API endpoint currently returns text-based diffs and works best
|
||||
with the text_json_diff processor. Future processor types (like image_diff,
|
||||
restock_diff) may want to implement their own specialized API endpoints
|
||||
for returning processor-specific data (e.g., price charts, image comparisons).
|
||||
|
||||
The web UI diff page (/diff/<uuid>) is processor-aware and delegates rendering
|
||||
to processors/{type}/difference.py::render() for processor-specific visualizations.
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchHistoryDiff')
|
||||
def get(self, uuid, from_timestamp, to_timestamp):
|
||||
"""Generate diff between two historical snapshots."""
|
||||
from changedetectionio import diff
|
||||
from changedetectionio.notification.handler import apply_service_tweaks
|
||||
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message=f"No watch exists with the UUID of {uuid}")
|
||||
|
||||
if not len(watch.history):
|
||||
abort(404, message=f"Watch found but no history exists for the UUID {uuid}")
|
||||
|
||||
history_keys = list(watch.history.keys())
|
||||
|
||||
# Handle 'latest' keyword for to_timestamp
|
||||
if to_timestamp == 'latest':
|
||||
to_timestamp = history_keys[-1]
|
||||
|
||||
# Handle 'previous' keyword for from_timestamp (second-most-recent)
|
||||
if from_timestamp == 'previous':
|
||||
if len(history_keys) < 2:
|
||||
abort(404, message=f"Not enough history entries. Need at least 2 snapshots for 'previous'")
|
||||
from_timestamp = history_keys[-2]
|
||||
|
||||
# Validate timestamps exist
|
||||
if from_timestamp not in watch.history:
|
||||
abort(404, message=f"From timestamp {from_timestamp} not found in watch history")
|
||||
if to_timestamp not in watch.history:
|
||||
abort(404, message=f"To timestamp {to_timestamp} not found in watch history")
|
||||
|
||||
# Get the format parameter (default to 'text')
|
||||
output_format = request.args.get('format', 'text').lower()
|
||||
|
||||
# Validate format
|
||||
if output_format not in valid_notification_formats.keys():
|
||||
abort(400, message=f"Invalid format. Must be one of: {', '.join(valid_notification_formats.keys())}")
|
||||
|
||||
# Get the word_diff parameter (default to False - line-level mode)
|
||||
word_diff = strtobool(request.args.get('word_diff', 'false'))
|
||||
|
||||
# Get the no_markup parameter (default to False)
|
||||
no_markup = strtobool(request.args.get('no_markup', 'false'))
|
||||
|
||||
# Retrieve snapshot contents
|
||||
from_version_file_contents = watch.get_history_snapshot(from_timestamp)
|
||||
to_version_file_contents = watch.get_history_snapshot(to_timestamp)
|
||||
|
||||
# Get diff preferences from query parameters (matching UI preferences in DIFF_PREFERENCES_CONFIG)
|
||||
# Support both 'type' (UI parameter) and 'word_diff' (API parameter) for backward compatibility
|
||||
diff_type = request.args.get('type', 'diffLines')
|
||||
if diff_type == 'diffWords':
|
||||
word_diff = True
|
||||
|
||||
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'true'))
|
||||
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
||||
include_removed = strtobool(request.args.get('removed', 'true'))
|
||||
include_added = strtobool(request.args.get('added', 'true'))
|
||||
include_replaced = strtobool(request.args.get('replaced', 'true'))
|
||||
|
||||
# Generate the diff with all preferences
|
||||
content = diff.render_diff(
|
||||
previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
ignore_junk=ignore_whitespace,
|
||||
include_equal=changes_only,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
word_diff=word_diff,
|
||||
)
|
||||
|
||||
# Skip formatting if no_markup is set
|
||||
if no_markup:
|
||||
mimetype = "text/plain"
|
||||
else:
|
||||
# Apply formatting based on the requested format
|
||||
if output_format == 'htmlcolor':
|
||||
from changedetectionio.notification.handler import apply_html_color_to_body
|
||||
content = apply_html_color_to_body(n_body=content)
|
||||
mimetype = "text/html"
|
||||
else:
|
||||
# Apply service tweaks for text/html formats
|
||||
# Pass empty URL and title as they're not used for the placeholder replacement we need
|
||||
_, content, _ = apply_service_tweaks(
|
||||
url='',
|
||||
n_body=content,
|
||||
n_title='',
|
||||
requested_output_format=output_format
|
||||
)
|
||||
mimetype = "text/html" if output_format == 'html' else "text/plain"
|
||||
|
||||
if 'html' in output_format:
|
||||
content = newline_re.sub('<br>\r\n', content)
|
||||
|
||||
response = make_response(content, 200)
|
||||
response.mimetype = mimetype
|
||||
return response
|
||||
|
||||
|
||||
class WatchFavicon(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
@@ -196,16 +403,9 @@ class WatchFavicon(Resource):
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
@@ -235,8 +435,9 @@ class CreateWatch(Resource):
|
||||
|
||||
if json_data.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not json_data.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
if not plist or json_data.get('proxy') not in plist:
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(json_data)
|
||||
@@ -255,7 +456,8 @@ class CreateWatch(Resource):
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||
if new_uuid:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
# Dont queue because the scheduler will check that it hasnt been checked before anyway
|
||||
# worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported URL", 400
|
||||
@@ -285,8 +487,58 @@ class CreateWatch(Resource):
|
||||
}
|
||||
|
||||
if request.args.get('recheck_all'):
|
||||
for uuid in self.datastore.data['watching'].keys():
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return {'status': "OK"}, 200
|
||||
# Collect all watches to queue
|
||||
watches_to_queue = self.datastore.data['watching'].keys()
|
||||
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = [
|
||||
uuid for uuid in watches_to_queue
|
||||
if uuid not in queued_uuids and uuid not in running_uuids
|
||||
]
|
||||
|
||||
# Queue only the filtered watches
|
||||
for uuid in watches_to_queue_filtered:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
if skipped_count > 0:
|
||||
return {'status': f'OK, queued {len(watches_to_queue_filtered)} watches for rechecking ({skipped_count} already queued or running)'}, 200
|
||||
else:
|
||||
return {'status': f'OK, queued {len(watches_to_queue_filtered)} watches for rechecking'}, 200
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
|
||||
def queue_all_watches_background():
|
||||
"""Background thread to queue all watches - discarded after completion."""
|
||||
try:
|
||||
queued_count = 0
|
||||
skipped_count = 0
|
||||
for uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if uuid not in queued_uuids and uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
logger.info(f"Background queueing complete: {queued_count} watches queued, {skipped_count} skipped (already queued/running)")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing all watches: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=queue_all_watches_background, daemon=True, name="QueueAllWatches-Background")
|
||||
thread.start()
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
return list, 200
|
||||
@@ -41,7 +41,7 @@ def get_openapi_spec():
|
||||
# Possibly for pip3 packages
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r') as f:
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
||||
return _openapi_spec
|
||||
@@ -51,6 +51,7 @@ def validate_openapi_request(operation_id):
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
from werkzeug.exceptions import BadRequest
|
||||
try:
|
||||
# Skip OpenAPI validation for GET requests since they don't have request bodies
|
||||
if request.method.upper() != 'GET':
|
||||
@@ -61,7 +62,6 @@ def validate_openapi_request(operation_id):
|
||||
openapi_request = FlaskOpenAPIRequest(request)
|
||||
result = spec.unmarshal_request(openapi_request)
|
||||
if result.errors:
|
||||
from werkzeug.exceptions import BadRequest
|
||||
error_details = []
|
||||
for error in result.errors:
|
||||
error_details.append(str(error))
|
||||
@@ -78,7 +78,7 @@ def validate_openapi_request(operation_id):
|
||||
return decorator
|
||||
|
||||
# Import all API resources
|
||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon
|
||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, WatchFavicon
|
||||
from .Tags import Tags, Tag
|
||||
from .Import import Import
|
||||
from .SystemInfo import SystemInfo
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from blinker import signal
|
||||
from .processors.exceptions import ProcessorException
|
||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
@@ -7,7 +8,7 @@ from changedetectionio.flask_app import watch_check_update
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
import queue
|
||||
import sys
|
||||
import time
|
||||
|
||||
from loguru import logger
|
||||
@@ -15,38 +16,67 @@ from loguru import logger
|
||||
# Async version of update_worker
|
||||
# Processes jobs from AsyncSignalPriorityQueue instead of threaded queue
|
||||
|
||||
async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
IN_PYTEST = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
DEFER_SLEEP_TIME_ALREADY_QUEUED = 0.3 if IN_PYTEST else 10.0
|
||||
|
||||
async def async_update_worker(worker_id, q, notification_q, app, datastore, executor=None):
|
||||
"""
|
||||
Async worker function that processes watch check jobs from the queue.
|
||||
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for this worker
|
||||
q: AsyncSignalPriorityQueue containing jobs to process
|
||||
notification_q: Standard queue for notifications
|
||||
app: Flask application instance
|
||||
datastore: Application datastore
|
||||
executor: ThreadPoolExecutor for queue operations (optional)
|
||||
|
||||
Returns:
|
||||
"restart" if worker should restart, "shutdown" for clean exit
|
||||
"""
|
||||
# Set a descriptive name for this task
|
||||
task = asyncio.current_task()
|
||||
if task:
|
||||
task.set_name(f"async-worker-{worker_id}")
|
||||
|
||||
logger.info(f"Starting async worker {worker_id}")
|
||||
|
||||
|
||||
# Read restart policy from environment
|
||||
max_jobs = int(os.getenv("WORKER_MAX_JOBS", "10"))
|
||||
max_runtime_seconds = int(os.getenv("WORKER_MAX_RUNTIME", "3600")) # 1 hour default
|
||||
|
||||
jobs_processed = 0
|
||||
start_time = time.time()
|
||||
|
||||
logger.info(f"Starting async worker {worker_id} (max_jobs={max_jobs}, max_runtime={max_runtime_seconds}s)")
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
update_handler = None
|
||||
watch = None
|
||||
|
||||
try:
|
||||
# Use native janus async interface - no threads needed!
|
||||
queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
|
||||
|
||||
# Use sync interface via run_in_executor since each worker has its own event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
queued_item_data = await asyncio.wait_for(
|
||||
loop.run_in_executor(executor, q.get, True, 1.0), # block=True, timeout=1.0
|
||||
timeout=1.5
|
||||
)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# No jobs available, continue loop
|
||||
# No jobs available - check if we should restart based on time while idle
|
||||
runtime = time.time() - start_time
|
||||
if runtime >= max_runtime_seconds:
|
||||
logger.info(f"Worker {worker_id} idle and reached max runtime ({runtime:.0f}s), restarting")
|
||||
return "restart"
|
||||
continue
|
||||
except Exception as e:
|
||||
# Handle expected Empty exception from queue timeout
|
||||
import queue
|
||||
if isinstance(e, queue.Empty):
|
||||
# Queue is empty, normal behavior - just continue
|
||||
continue
|
||||
|
||||
# Unexpected exception - log as critical
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
|
||||
|
||||
|
||||
# Log queue health for debugging
|
||||
try:
|
||||
queue_size = q.qsize()
|
||||
@@ -54,16 +84,30 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}")
|
||||
except Exception as health_e:
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}")
|
||||
|
||||
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
|
||||
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
fetch_start_time = round(time.time())
|
||||
|
||||
# Mark this UUID as being processed
|
||||
# RACE CONDITION FIX: Check if this UUID is already being processed by another worker
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.set_uuid_processing(uuid, processing=True)
|
||||
from changedetectionio.queuedWatchMetaData import PrioritizedItem
|
||||
if worker_handler.is_watch_running_by_another_worker(uuid, worker_id):
|
||||
logger.trace(f"Worker {worker_id} detected UUID {uuid} already being processed by another worker - deferring")
|
||||
# Sleep to avoid tight loop and give the other worker time to finish
|
||||
await asyncio.sleep(DEFER_SLEEP_TIME_ALREADY_QUEUED)
|
||||
|
||||
# Re-queue with lower priority so it gets checked again after current processing finishes
|
||||
deferred_priority = max(1000, queued_item_data.priority * 10)
|
||||
deferred_item = PrioritizedItem(priority=deferred_priority, item=queued_item_data.item)
|
||||
worker_handler.queue_item_async_safe(q, deferred_item, silent=True)
|
||||
logger.debug(f"Worker {worker_id} re-queued UUID {uuid} for subsequent check")
|
||||
continue
|
||||
|
||||
fetch_start_time = round(time.time())
|
||||
|
||||
# Mark this UUID as being processed by this worker
|
||||
worker_handler.set_uuid_processing(uuid, worker_id=worker_id, processing=True)
|
||||
|
||||
try:
|
||||
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
|
||||
@@ -87,9 +131,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Init a new 'difference_detection_processor'
|
||||
processor_module_name = f"changedetectionio.processors.{processor}.processor"
|
||||
try:
|
||||
processor_module = importlib.import_module(processor_module_name)
|
||||
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
|
||||
except ModuleNotFoundError as e:
|
||||
print(f"Processor module '{processor}' not found.")
|
||||
raise e
|
||||
@@ -97,11 +140,20 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid)
|
||||
|
||||
update_signal = signal('watch_small_status_comment')
|
||||
update_signal.send(watch_uuid=uuid, status="Fetching page..")
|
||||
|
||||
# All fetchers are now async, so call directly
|
||||
await update_handler.call_browser()
|
||||
|
||||
# Run change detection (this is synchronous)
|
||||
changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
|
||||
# Run change detection in executor to avoid blocking event loop
|
||||
# This includes CPU-intensive operations like HTML parsing (lxml/inscriptis)
|
||||
# which can take 2-10ms and cause GIL contention across workers
|
||||
loop = asyncio.get_event_loop()
|
||||
changed_detected, update_obj, contents = await loop.run_in_executor(
|
||||
executor,
|
||||
lambda: update_handler.run_changedetection(watch=watch)
|
||||
)
|
||||
|
||||
except PermissionError as e:
|
||||
logger.critical(f"File permission error updating file, watch: {uuid}")
|
||||
@@ -309,6 +361,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
logger.debug(f"Processing watch UUID: {uuid} - xpath_data length returned {len(update_handler.xpath_data) if update_handler.xpath_data else 'empty.'}")
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
@@ -326,7 +379,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
fetch_start_time += 1
|
||||
await asyncio.sleep(1)
|
||||
|
||||
watch.save_history_text(contents=contents,
|
||||
watch.save_history_blob(contents=contents,
|
||||
timestamp=int(fetch_start_time),
|
||||
snapshot_id=update_obj.get('previous_md5', 'none'))
|
||||
|
||||
@@ -353,12 +406,15 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
count = watch.get('check_count', 0) + 1
|
||||
|
||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
if page_title:
|
||||
page_title = page_title.strip()[:2000]
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
|
||||
# Record server header
|
||||
try:
|
||||
@@ -394,8 +450,13 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
# Always cleanup - this runs whether there was an exception or not
|
||||
if uuid:
|
||||
try:
|
||||
# Mark UUID as no longer being processed
|
||||
worker_handler.set_uuid_processing(uuid, processing=False)
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
await update_handler.fetcher.quit(watch=watch)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception while cleaning/quit after calling browser: {e}")
|
||||
try:
|
||||
# Mark UUID as no longer being processed by this worker
|
||||
worker_handler.set_uuid_processing(uuid, worker_id=worker_id, processing=False)
|
||||
|
||||
# Send completion signal
|
||||
if watch:
|
||||
@@ -424,10 +485,16 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
# 3. GC can't collect the object anyway (still referenced by datastore)
|
||||
# 4. It would just cause confusion
|
||||
|
||||
# Force garbage collection after cleanup
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
|
||||
|
||||
|
||||
del(uuid)
|
||||
|
||||
# Brief pause before continuing to avoid tight error loops (only on error)
|
||||
if 'e' in locals():
|
||||
await asyncio.sleep(1.0)
|
||||
@@ -435,6 +502,19 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
# Small yield for normal completion
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
# Job completed - increment counter and check restart conditions
|
||||
jobs_processed += 1
|
||||
runtime = time.time() - start_time
|
||||
|
||||
# Check if we should restart (only when idle, between jobs)
|
||||
should_restart_jobs = jobs_processed >= max_jobs
|
||||
should_restart_time = runtime >= max_runtime_seconds
|
||||
|
||||
if should_restart_jobs or should_restart_time:
|
||||
reason = f"{jobs_processed} jobs" if should_restart_jobs else f"{runtime:.0f}s runtime"
|
||||
logger.info(f"Worker {worker_id} restarting after {reason} ({jobs_processed} jobs, {runtime:.0f}s runtime)")
|
||||
return "restart"
|
||||
|
||||
# Check if we should exit
|
||||
if app.config.exit.is_set():
|
||||
break
|
||||
@@ -442,10 +522,12 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
import sys
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
|
||||
if not in_pytest:
|
||||
logger.info(f"Worker {worker_id} shutting down")
|
||||
|
||||
return "shutdown"
|
||||
|
||||
|
||||
def cleanup_error_artifacts(uuid, datastore):
|
||||
"""Helper function to clean up error artifacts"""
|
||||
|
||||
@@ -3,6 +3,7 @@ import glob
|
||||
import threading
|
||||
|
||||
from flask import Blueprint, render_template, send_from_directory, flash, url_for, redirect, abort
|
||||
from flask_babel import gettext
|
||||
import os
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
@@ -82,19 +83,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
def request_backup():
|
||||
if any(thread.is_alive() for thread in backup_threads):
|
||||
flash("A backup is already running, check back in a few minutes", "error")
|
||||
flash(gettext("A backup is already running, check back in a few minutes"), "error")
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
|
||||
flash("Maximum number of backups reached, please remove some", "error")
|
||||
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
zip_thread = threading.Thread(target=create_backup, args=(datastore.datastore_path, datastore.data.get("watching")))
|
||||
zip_thread = threading.Thread(
|
||||
target=create_backup,
|
||||
args=(datastore.datastore_path, datastore.data.get("watching")),
|
||||
daemon=True,
|
||||
name="BackupCreator"
|
||||
)
|
||||
zip_thread.start()
|
||||
backup_threads.append(zip_thread)
|
||||
flash("Backup building in background, check back in a few minutes.")
|
||||
flash(gettext("Backup building in background, check back in a few minutes."))
|
||||
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
@@ -157,7 +163,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
for backup in backups:
|
||||
os.unlink(backup)
|
||||
|
||||
flash("Backups were deleted.")
|
||||
flash(gettext("Backups were deleted."))
|
||||
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
|
||||
@@ -3,31 +3,31 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h4>Backups</h4>
|
||||
<h4>{{ _('Backups') }}</h4>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<strong>A backup is running!</strong>
|
||||
<strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
Here you can download and request a new backup, when a backup is completed you will see it listed below.
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} Mb</li>
|
||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>No backups found.</strong>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">Create backup</a>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">Remove backups</a>
|
||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -21,31 +21,154 @@ from changedetectionio.flask_app import login_optionally_required
|
||||
from loguru import logger
|
||||
|
||||
browsersteps_sessions = {}
|
||||
browsersteps_watch_to_session = {} # Maps watch_uuid -> browsersteps_session_id
|
||||
io_interface_context = None
|
||||
import json
|
||||
import hashlib
|
||||
from flask import Response
|
||||
import asyncio
|
||||
import threading
|
||||
import time
|
||||
|
||||
def run_async_in_browser_loop(coro):
|
||||
"""Run async coroutine using the existing async worker event loop"""
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
# Use the existing async worker event loop instead of creating a new one
|
||||
if worker_handler.USE_ASYNC_WORKERS and worker_handler.async_loop and not worker_handler.async_loop.is_closed():
|
||||
logger.debug("Browser steps using existing async worker event loop")
|
||||
future = asyncio.run_coroutine_threadsafe(coro, worker_handler.async_loop)
|
||||
return future.result()
|
||||
else:
|
||||
# Fallback: create a new event loop (for sync workers or if async loop not available)
|
||||
logger.debug("Browser steps creating temporary event loop")
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
# Dedicated event loop for ALL browser steps sessions
|
||||
_browser_steps_loop = None
|
||||
_browser_steps_thread = None
|
||||
_browser_steps_loop_lock = threading.Lock()
|
||||
|
||||
def _start_browser_steps_loop():
|
||||
"""Start a dedicated event loop for browser steps in its own thread"""
|
||||
global _browser_steps_loop
|
||||
|
||||
# Create and set the event loop for this thread
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
_browser_steps_loop = loop
|
||||
|
||||
logger.debug("Browser steps event loop started")
|
||||
|
||||
try:
|
||||
# Run the loop forever - handles all browsersteps sessions
|
||||
loop.run_forever()
|
||||
except Exception as e:
|
||||
logger.error(f"Browser steps event loop error: {e}")
|
||||
finally:
|
||||
try:
|
||||
return loop.run_until_complete(coro)
|
||||
# Cancel all remaining tasks
|
||||
pending = asyncio.all_tasks(loop)
|
||||
for task in pending:
|
||||
task.cancel()
|
||||
|
||||
# Wait for tasks to finish cancellation
|
||||
if pending:
|
||||
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
||||
except Exception as e:
|
||||
logger.debug(f"Error during browser steps loop cleanup: {e}")
|
||||
finally:
|
||||
loop.close()
|
||||
logger.debug("Browser steps event loop closed")
|
||||
|
||||
def _ensure_browser_steps_loop():
|
||||
"""Ensure the browser steps event loop is running"""
|
||||
global _browser_steps_loop, _browser_steps_thread
|
||||
|
||||
with _browser_steps_loop_lock:
|
||||
if _browser_steps_thread is None or not _browser_steps_thread.is_alive():
|
||||
logger.debug("Starting browser steps event loop thread")
|
||||
_browser_steps_thread = threading.Thread(
|
||||
target=_start_browser_steps_loop,
|
||||
daemon=True,
|
||||
name="BrowserStepsEventLoop"
|
||||
)
|
||||
_browser_steps_thread.start()
|
||||
|
||||
# Wait for the loop to be ready
|
||||
timeout = 5.0
|
||||
start_time = time.time()
|
||||
while _browser_steps_loop is None:
|
||||
if time.time() - start_time > timeout:
|
||||
raise RuntimeError("Browser steps event loop failed to start")
|
||||
time.sleep(0.01)
|
||||
|
||||
logger.debug("Browser steps event loop thread started and ready")
|
||||
|
||||
def run_async_in_browser_loop(coro):
|
||||
"""Run async coroutine using the dedicated browser steps event loop"""
|
||||
_ensure_browser_steps_loop()
|
||||
|
||||
if _browser_steps_loop and not _browser_steps_loop.is_closed():
|
||||
logger.debug("Browser steps using dedicated event loop")
|
||||
future = asyncio.run_coroutine_threadsafe(coro, _browser_steps_loop)
|
||||
return future.result()
|
||||
else:
|
||||
raise RuntimeError("Browser steps event loop is not available")
|
||||
|
||||
def cleanup_expired_sessions():
|
||||
"""Remove expired browsersteps sessions and cleanup their resources"""
|
||||
global browsersteps_sessions, browsersteps_watch_to_session
|
||||
|
||||
expired_session_ids = []
|
||||
|
||||
# Find expired sessions
|
||||
for session_id, session_data in browsersteps_sessions.items():
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper and browserstepper.has_expired:
|
||||
expired_session_ids.append(session_id)
|
||||
|
||||
# Cleanup expired sessions
|
||||
for session_id in expired_session_ids:
|
||||
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
||||
session_data = browsersteps_sessions[session_id]
|
||||
|
||||
# Cleanup playwright resources asynchronously
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
|
||||
# Remove from watch mapping
|
||||
for watch_uuid, mapped_session_id in list(browsersteps_watch_to_session.items()):
|
||||
if mapped_session_id == session_id:
|
||||
del browsersteps_watch_to_session[watch_uuid]
|
||||
break
|
||||
|
||||
if expired_session_ids:
|
||||
logger.info(f"Cleaned up {len(expired_session_ids)} expired browsersteps session(s)")
|
||||
|
||||
def cleanup_session_for_watch(watch_uuid):
|
||||
"""Cleanup a specific browsersteps session for a watch UUID"""
|
||||
global browsersteps_sessions, browsersteps_watch_to_session
|
||||
|
||||
session_id = browsersteps_watch_to_session.get(watch_uuid)
|
||||
if not session_id:
|
||||
logger.debug(f"No browsersteps session found for watch {watch_uuid}")
|
||||
return
|
||||
|
||||
logger.debug(f"Cleaning up browsersteps session {session_id} for watch {watch_uuid}")
|
||||
|
||||
session_data = browsersteps_sessions.get(session_id)
|
||||
if session_data:
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
|
||||
# Remove from watch mapping
|
||||
del browsersteps_watch_to_session[watch_uuid]
|
||||
|
||||
logger.debug(f"Cleaned up session for watch {watch_uuid}")
|
||||
|
||||
# Opportunistically cleanup any other expired sessions
|
||||
cleanup_expired_sessions()
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||
@@ -123,6 +246,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
if not watch_uuid:
|
||||
return make_response('No Watch UUID specified', 500)
|
||||
|
||||
# Cleanup any existing session for this watch
|
||||
cleanup_session_for_watch(watch_uuid)
|
||||
|
||||
logger.debug("Starting connection with playwright")
|
||||
logger.debug("browser_steps.py connecting")
|
||||
|
||||
@@ -131,6 +257,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop(
|
||||
start_browsersteps_session(watch_uuid)
|
||||
)
|
||||
|
||||
# Store the mapping of watch_uuid -> browsersteps_session_id
|
||||
browsersteps_watch_to_session[watch_uuid] = browsersteps_session_id
|
||||
|
||||
except Exception as e:
|
||||
if 'ECONNREFUSED' in str(e):
|
||||
return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
|
||||
|
||||
@@ -439,7 +439,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
logger.warning("Attempted to get current state after cleanup")
|
||||
return (None, None)
|
||||
|
||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding="utf-8")
|
||||
|
||||
now = time.time()
|
||||
await self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
@@ -1,13 +1,8 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
import_distill_io_json,
|
||||
import_xlsx_wachete,
|
||||
import_xlsx_custom
|
||||
)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
import_blueprint = Blueprint('imports', __name__, template_folder="templates")
|
||||
@@ -17,15 +12,26 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
def import_page():
|
||||
remaining_urls = []
|
||||
from changedetectionio import forms
|
||||
|
||||
#
|
||||
if request.method == 'POST':
|
||||
# from changedetectionio import worker_handler
|
||||
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
import_distill_io_json,
|
||||
import_xlsx_wachete,
|
||||
import_xlsx_custom
|
||||
)
|
||||
|
||||
# URL List import
|
||||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
importer_handler = import_url_list()
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||
for uuid in importer_handler.new_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs")
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
if len(importer_handler.remaining_data) == 0:
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -37,8 +43,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Import and push into the queue for immediate update check
|
||||
d_importer = import_distill_io_json()
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
for uuid in d_importer.new_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# XLSX importer
|
||||
if request.files and request.files.get('xlsx_file'):
|
||||
@@ -60,8 +68,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
w_importer.import_profile = map
|
||||
w_importer.run(data=file, flash=flash, datastore=datastore)
|
||||
|
||||
for uuid in w_importer.new_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
||||
|
||||
@@ -2,6 +2,7 @@ from abc import abstractmethod
|
||||
import time
|
||||
from wtforms import ValidationError
|
||||
from loguru import logger
|
||||
from flask_babel import gettext
|
||||
|
||||
from changedetectionio.forms import validate_url
|
||||
|
||||
@@ -41,7 +42,7 @@ class import_url_list(Importer):
|
||||
now = time.time()
|
||||
|
||||
if (len(urls) > 5000):
|
||||
flash("Importing 5,000 of the first URLs from your list, the rest can be imported again.")
|
||||
flash(gettext("Importing 5,000 of the first URLs from your list, the rest can be imported again."))
|
||||
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
@@ -74,7 +75,7 @@ class import_url_list(Importer):
|
||||
self.remaining_data = []
|
||||
self.remaining_data.append(url)
|
||||
|
||||
flash("{} Imported from list in {:.2f}s, {} Skipped.".format(good, time.time() - now, len(self.remaining_data)))
|
||||
flash(gettext("{} Imported from list in {:.2f}s, {} Skipped.").format(good, time.time() - now, len(self.remaining_data)))
|
||||
|
||||
|
||||
class import_distill_io_json(Importer):
|
||||
@@ -94,11 +95,11 @@ class import_distill_io_json(Importer):
|
||||
try:
|
||||
data = json.loads(data.strip())
|
||||
except json.decoder.JSONDecodeError:
|
||||
flash("Unable to read JSON file, was it broken?", 'error')
|
||||
flash(gettext("Unable to read JSON file, was it broken?"), 'error')
|
||||
return
|
||||
|
||||
if not data.get('data'):
|
||||
flash("JSON structure looks invalid, was it broken?", 'error')
|
||||
flash(gettext("JSON structure looks invalid, was it broken?"), 'error')
|
||||
return
|
||||
|
||||
for d in data.get('data'):
|
||||
@@ -135,7 +136,7 @@ class import_distill_io_json(Importer):
|
||||
self.new_uuids.append(new_uuid)
|
||||
good += 1
|
||||
|
||||
flash("{} Imported from Distill.io in {:.2f}s, {} Skipped.".format(len(self.new_uuids), time.time() - now, len(self.remaining_data)))
|
||||
flash(gettext("{} Imported from Distill.io in {:.2f}s, {} Skipped.").format(len(self.new_uuids), time.time() - now, len(self.remaining_data)))
|
||||
|
||||
|
||||
class import_xlsx_wachete(Importer):
|
||||
@@ -156,7 +157,7 @@ class import_xlsx_wachete(Importer):
|
||||
wb = load_workbook(data)
|
||||
except Exception as e:
|
||||
# @todo correct except
|
||||
flash("Unable to read export XLSX file, something wrong with the file?", 'error')
|
||||
flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error')
|
||||
return
|
||||
|
||||
row_id = 2
|
||||
@@ -196,7 +197,7 @@ class import_xlsx_wachete(Importer):
|
||||
validate_url(data.get('url'))
|
||||
except ValidationError as e:
|
||||
logger.error(f">> Import URL error {data.get('url')} {str(e)}")
|
||||
flash(f"Error processing row number {row_id}, URL value was incorrect, row was skipped.", 'error')
|
||||
flash(gettext("Error processing row number {}, URL value was incorrect, row was skipped.").format(row_id), 'error')
|
||||
# Don't bother processing anything else on this row
|
||||
continue
|
||||
|
||||
@@ -210,12 +211,11 @@ class import_xlsx_wachete(Importer):
|
||||
good += 1
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
flash(f"Error processing row number {row_id}, check all cell data types are correct, row was skipped.", 'error')
|
||||
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_id), 'error')
|
||||
else:
|
||||
row_id += 1
|
||||
|
||||
flash(
|
||||
"{} imported from Wachete .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))
|
||||
flash(gettext("{} imported from Wachete .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
|
||||
|
||||
|
||||
class import_xlsx_custom(Importer):
|
||||
@@ -236,7 +236,7 @@ class import_xlsx_custom(Importer):
|
||||
wb = load_workbook(data)
|
||||
except Exception as e:
|
||||
# @todo correct except
|
||||
flash("Unable to read export XLSX file, something wrong with the file?", 'error')
|
||||
flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error')
|
||||
return
|
||||
|
||||
# @todo cehck atleast 2 rows, same in other method
|
||||
@@ -265,7 +265,7 @@ class import_xlsx_custom(Importer):
|
||||
validate_url(url)
|
||||
except ValidationError as e:
|
||||
logger.error(f">> Import URL error {url} {str(e)}")
|
||||
flash(f"Error processing row number {row_i}, URL value was incorrect, row was skipped.", 'error')
|
||||
flash(gettext("Error processing row number {}, URL value was incorrect, row was skipped.").format(row_i), 'error')
|
||||
# Don't bother processing anything else on this row
|
||||
url = None
|
||||
break
|
||||
@@ -294,9 +294,8 @@ class import_xlsx_custom(Importer):
|
||||
good += 1
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
flash(f"Error processing row number {row_i}, check all cell data types are correct, row was skipped.", 'error')
|
||||
flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_i), 'error')
|
||||
else:
|
||||
row_i += 1
|
||||
|
||||
flash(
|
||||
"{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))
|
||||
flash(gettext("{} imported from custom .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now))
|
||||
@@ -6,9 +6,9 @@
|
||||
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#url-list">URL List</a></li>
|
||||
<li class="tab"><a href="#distill-io">Distill.io</a></li>
|
||||
<li class="tab"><a href="#xlsx">.XLSX & Wachete</a></li>
|
||||
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
|
||||
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
|
||||
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -17,11 +17,10 @@
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<div class="tab-pane-inner" id="url-list">
|
||||
<div class="pure-control-group">
|
||||
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
|
||||
(,):
|
||||
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
|
||||
<br>
|
||||
<p><strong>Example: </strong><code>https://example.com tag1, tag2, last tag</code></p>
|
||||
URLs which do not pass validation will stay in the textarea.
|
||||
<p><strong>{{ _('Example:') }} </strong><code>https://example.com tag1, tag2, last tag</code></p>
|
||||
{{ _('URLs which do not pass validation will stay in the textarea.') }}
|
||||
</div>
|
||||
{{ render_field(form.processor, class="processor") }}
|
||||
|
||||
@@ -42,12 +41,12 @@
|
||||
|
||||
|
||||
<div class="pure-control-group">
|
||||
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
|
||||
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
|
||||
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
|
||||
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
|
||||
<br>
|
||||
<p>
|
||||
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
|
||||
Be sure to set your default fetcher to Chrome if required.<br>
|
||||
{{ _('How to export?') }} <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
|
||||
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -89,32 +88,32 @@
|
||||
</fieldset>
|
||||
<div class="pure-control-group">
|
||||
<span class="pure-form-message-inline">
|
||||
Table of custom column and data types mapping for the <strong>Custom mapping</strong> File mapping type.
|
||||
{{ _('Table of custom column and data types mapping for the') }} <strong>{{ _('Custom mapping') }}</strong> {{ _('File mapping type.') }}
|
||||
</span>
|
||||
<table style="border: 1px solid #aaa; padding: 0.5rem; border-radius: 4px;">
|
||||
<tr>
|
||||
<td><strong>Column #</strong></td>
|
||||
<td><strong>{{ _('Column #') }}</strong></td>
|
||||
{% for n in range(4) %}
|
||||
<td><input type="number" name="custom_xlsx[col_{{n}}]" style="width: 4rem;" min="1"></td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Type</strong></td>
|
||||
<td><strong>{{ _('Type') }}</strong></td>
|
||||
{% for n in range(4) %}
|
||||
<td><select name="custom_xlsx[col_type_{{n}}]">
|
||||
<option value="" style="color: #aaa"> -- none --</option>
|
||||
<option value="url">URL</option>
|
||||
<option value="title">Title</option>
|
||||
<option value="include_filters">CSS/xPath filter</option>
|
||||
<option value="tag">Group / Tag name(s)</option>
|
||||
<option value="interval_minutes">Recheck time (minutes)</option>
|
||||
<option value="" style="color: #aaa"> -- {{ _('none') }} --</option>
|
||||
<option value="url">{{ _('URL') }}</option>
|
||||
<option value="title">{{ _('Title') }}</option>
|
||||
<option value="include_filters">{{ _('CSS/xPath filter') }}</option>
|
||||
<option value="tag">{{ _('Group / Tag name(s)') }}</option>
|
||||
<option value="interval_minutes">{{ _('Recheck time (minutes)') }}</option>
|
||||
</select></td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">Import</button>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -1 +1,27 @@
|
||||
RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')]
|
||||
from copy import deepcopy
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from changedetectionio.notification import valid_notification_formats
|
||||
|
||||
RSS_CONTENT_FORMAT_DEFAULT = 'text'
|
||||
|
||||
# Some stuff not related
|
||||
RSS_FORMAT_TYPES = deepcopy(valid_notification_formats)
|
||||
if RSS_FORMAT_TYPES.get('markdown'):
|
||||
del RSS_FORMAT_TYPES['markdown']
|
||||
|
||||
if RSS_FORMAT_TYPES.get(USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH):
|
||||
del RSS_FORMAT_TYPES[USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH]
|
||||
|
||||
if not RSS_FORMAT_TYPES.get(RSS_CONTENT_FORMAT_DEFAULT):
|
||||
logger.critical(f"RSS_CONTENT_FORMAT_DEFAULT not in the acceptable list {RSS_CONTENT_FORMAT_DEFAULT}")
|
||||
|
||||
RSS_TEMPLATE_TYPE_OPTIONS = {'system_default': 'System default', 'notification_body': 'Notification body'}
|
||||
|
||||
# @note: We use <pre> because nearly all RSS readers render only HTML (Thunderbird for example cant do just plaintext)
|
||||
RSS_TEMPLATE_PLAINTEXT_DEFAULT = "<pre>{{watch_label}} had a change.\n\n{{diff}}\n</pre>"
|
||||
|
||||
# @todo add some [edit]/[history]/[goto] etc links
|
||||
# @todo need {{watch_edit_link}} + delete + history link token
|
||||
RSS_TEMPLATE_HTML_DEFAULT = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_label}}</a></h4>\n<p>{{diff}}</p>\n</body></html>\n"
|
||||
|
||||
156
changedetectionio/blueprint/rss/_util.py
Normal file
156
changedetectionio/blueprint/rss/_util.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""
|
||||
Utility functions for RSS feed generation.
|
||||
"""
|
||||
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
from changedetectionio.notification_service import NotificationContextData, _check_cascading_vars
|
||||
from loguru import logger
|
||||
import datetime
|
||||
import pytz
|
||||
import re
|
||||
|
||||
|
||||
BAD_CHARS_REGEX = r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
|
||||
|
||||
|
||||
def scan_invalid_chars_in_rss(content):
|
||||
"""
|
||||
Scan for invalid characters in RSS content.
|
||||
Returns True if invalid characters are found.
|
||||
"""
|
||||
for match in re.finditer(BAD_CHARS_REGEX, content):
|
||||
i = match.start()
|
||||
bad_char = content[i]
|
||||
hex_value = f"0x{ord(bad_char):02x}"
|
||||
# Grab context
|
||||
start = max(0, i - 20)
|
||||
end = min(len(content), i + 21)
|
||||
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
|
||||
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
|
||||
# First match is enough
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def clean_entry_content(content):
|
||||
"""
|
||||
Remove invalid characters from RSS content.
|
||||
"""
|
||||
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
|
||||
return cleaned
|
||||
|
||||
|
||||
def generate_watch_guid(watch, timestamp):
|
||||
"""
|
||||
Generate a unique GUID for a watch RSS entry.
|
||||
|
||||
Args:
|
||||
watch: The watch object
|
||||
timestamp: The timestamp of the specific change this entry represents
|
||||
"""
|
||||
return f"{watch['uuid']}/{timestamp}"
|
||||
|
||||
|
||||
def validate_rss_token(datastore, request):
|
||||
"""
|
||||
Validate the RSS access token from the request.
|
||||
|
||||
Returns:
|
||||
tuple: (is_valid, error_response) where error_response is None if valid
|
||||
"""
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
|
||||
rss_url_token = request.args.get('token')
|
||||
|
||||
if rss_url_token != app_rss_token:
|
||||
return False, ("Access denied, bad token", 403)
|
||||
|
||||
return True, None
|
||||
|
||||
|
||||
def get_rss_template(datastore, watch, rss_content_format, default_html, default_plaintext):
|
||||
"""Get the appropriate template for RSS content."""
|
||||
if datastore.data['settings']['application'].get('rss_template_type') == 'notification_body':
|
||||
return _check_cascading_vars(datastore=datastore, var_name='notification_body', watch=watch)
|
||||
|
||||
override = datastore.data['settings']['application'].get('rss_template_override')
|
||||
if override and override.strip():
|
||||
return override
|
||||
elif 'text' in rss_content_format:
|
||||
return default_plaintext
|
||||
else:
|
||||
return default_html
|
||||
|
||||
|
||||
def get_watch_label(datastore, watch):
|
||||
"""Get the label for a watch based on settings."""
|
||||
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
|
||||
return watch.label
|
||||
else:
|
||||
return watch.get('url')
|
||||
|
||||
|
||||
def add_watch_categories(fe, watch, datastore):
|
||||
"""Add category tags to a feed entry based on watch tags."""
|
||||
for tag_uuid in watch.get('tags', []):
|
||||
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
|
||||
if tag and tag.get('title'):
|
||||
fe.category(term=tag.get('title'))
|
||||
|
||||
|
||||
def build_notification_context(watch, timestamp_from, timestamp_to, watch_label,
|
||||
n_body_template, rss_content_format):
|
||||
"""Build the notification context object."""
|
||||
return NotificationContextData(initial_data={
|
||||
'notification_urls': ['null://just-sending-a-null-test-for-the-render-in-RSS'],
|
||||
'notification_body': n_body_template,
|
||||
'timestamp_to': timestamp_to,
|
||||
'timestamp_from': timestamp_from,
|
||||
'watch_label': watch_label,
|
||||
'notification_format': rss_content_format
|
||||
})
|
||||
|
||||
|
||||
def render_notification(n_object, notification_service, watch, datastore,
|
||||
date_index_from=None, date_index_to=None):
|
||||
"""Process and render the notification content."""
|
||||
kwargs = {'n_object': n_object, 'watch': watch}
|
||||
|
||||
if date_index_from is not None and date_index_to is not None:
|
||||
kwargs['date_index_from'] = date_index_from
|
||||
kwargs['date_index_to'] = date_index_to
|
||||
|
||||
n_object = notification_service.queue_notification_for_watch(**kwargs)
|
||||
n_object['watch_mime_type'] = None
|
||||
|
||||
res = process_notification(n_object=n_object, datastore=datastore)
|
||||
return res[0]
|
||||
|
||||
|
||||
def populate_feed_entry(fe, watch, content, guid, timestamp, link=None, title_suffix=None):
|
||||
"""Populate a feed entry with content and metadata."""
|
||||
watch_label = watch.get('url') # Already determined by caller
|
||||
|
||||
# Set link
|
||||
if link:
|
||||
fe.link(link=link)
|
||||
|
||||
# Set title
|
||||
if title_suffix:
|
||||
fe.title(title=f"{watch_label} - {title_suffix}")
|
||||
else:
|
||||
fe.title(title=watch_label)
|
||||
|
||||
# Clean and set content
|
||||
if scan_invalid_chars_in_rss(content):
|
||||
content = clean_entry_content(content)
|
||||
fe.content(content=content, type='CDATA')
|
||||
|
||||
# Set GUID
|
||||
fe.guid(guid, permalink=False)
|
||||
|
||||
# Set pubDate using the timestamp of this specific change
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
fe.pubDate(dt)
|
||||
|
||||
@@ -1,155 +1,26 @@
|
||||
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.notification.handler import apply_service_tweaks
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from feedgen.feed import FeedGenerator
|
||||
from flask import Blueprint, make_response, request, url_for, redirect
|
||||
from loguru import logger
|
||||
import datetime
|
||||
import pytz
|
||||
import re
|
||||
import time
|
||||
from flask import Blueprint
|
||||
|
||||
|
||||
BAD_CHARS_REGEX=r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
|
||||
|
||||
# Anything that is not text/UTF-8 should be stripped before it breaks feedgen (such as binary data etc)
|
||||
def scan_invalid_chars_in_rss(content):
|
||||
for match in re.finditer(BAD_CHARS_REGEX, content):
|
||||
i = match.start()
|
||||
bad_char = content[i]
|
||||
hex_value = f"0x{ord(bad_char):02x}"
|
||||
# Grab context
|
||||
start = max(0, i - 20)
|
||||
end = min(len(content), i + 21)
|
||||
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
|
||||
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
|
||||
# First match is enough
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def clean_entry_content(content):
|
||||
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
|
||||
return cleaned
|
||||
from . import tag as tag_routes
|
||||
from . import main_feed
|
||||
from . import single_watch
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
"""
|
||||
Construct and configure the RSS blueprint with all routes.
|
||||
|
||||
Args:
|
||||
datastore: The ChangeDetectionStore instance
|
||||
|
||||
Returns:
|
||||
The configured Flask blueprint
|
||||
"""
|
||||
rss_blueprint = Blueprint('rss', __name__)
|
||||
|
||||
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
|
||||
# to some earlier blueprint rerouting work, it should goto feed.
|
||||
@rss_blueprint.route("/", methods=['GET'])
|
||||
def extraslash():
|
||||
return redirect(url_for('rss.feed'))
|
||||
|
||||
# Import the login decorator if needed
|
||||
# from changedetectionio.auth_decorator import login_optionally_required
|
||||
@rss_blueprint.route("", methods=['GET'])
|
||||
def feed():
|
||||
now = time.time()
|
||||
# Always requires token set
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
|
||||
rss_url_token = request.args.get('token')
|
||||
if rss_url_token != app_rss_token:
|
||||
return "Access denied, bad token", 403
|
||||
|
||||
from changedetectionio import diff
|
||||
limit_tag = request.args.get('tag', '').lower().strip()
|
||||
# Be sure limit_tag is a uuid
|
||||
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
||||
if limit_tag == tag.get('title', '').lower().strip():
|
||||
limit_tag = uuid
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
|
||||
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
# @todo tag notification_muted skip also (improve Watch model)
|
||||
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
|
||||
continue
|
||||
if limit_tag and not limit_tag in watch['tags']:
|
||||
continue
|
||||
watch['uuid'] = uuid
|
||||
sorted_watches.append(watch)
|
||||
|
||||
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
|
||||
|
||||
fg = FeedGenerator()
|
||||
fg.title('changedetection.io')
|
||||
fg.description('Feed description')
|
||||
fg.link(href='https://changedetection.io')
|
||||
|
||||
html_colour_enable = False
|
||||
if datastore.data['settings']['application'].get('rss_content_format') == 'html':
|
||||
html_colour_enable = True
|
||||
|
||||
for watch in sorted_watches:
|
||||
|
||||
dates = list(watch.history.keys())
|
||||
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
|
||||
if len(dates) < 2:
|
||||
continue
|
||||
|
||||
if not watch.viewed:
|
||||
# Re #239 - GUID needs to be individual for each event
|
||||
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
||||
guid = "{}/{}".format(watch['uuid'], watch.last_changed)
|
||||
fe = fg.add_entry()
|
||||
|
||||
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
|
||||
# Description is the page you watch, link takes you to the diff JS UI page
|
||||
# Dict val base_url will get overriden with the env var if it is set.
|
||||
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
|
||||
# @todo fix
|
||||
|
||||
# Because we are called via whatever web server, flask should figure out the right path (
|
||||
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||
|
||||
fe.link(link=diff_link)
|
||||
|
||||
# Same logic as watch-overview.html
|
||||
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
|
||||
watch_label = watch.label
|
||||
else:
|
||||
watch_label = watch.get('url')
|
||||
|
||||
fe.title(title=watch_label)
|
||||
try:
|
||||
|
||||
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
||||
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
|
||||
include_equal=False,
|
||||
line_feed_sep="<br>"
|
||||
)
|
||||
|
||||
|
||||
requested_output_format = 'htmlcolor' if html_colour_enable else 'html'
|
||||
html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
|
||||
|
||||
# @todo Make this configurable and also consider html-colored markup
|
||||
# @todo User could decide if <link> goes to the diff page, or to the watch link
|
||||
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
|
||||
|
||||
content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link)
|
||||
|
||||
# Out of range chars could also break feedgen
|
||||
if scan_invalid_chars_in_rss(content):
|
||||
content = clean_entry_content(content)
|
||||
|
||||
fe.content(content=content, type='CDATA')
|
||||
fe.guid(guid, permalink=False)
|
||||
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
fe.pubDate(dt)
|
||||
|
||||
response = make_response(fg.rss_str())
|
||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
|
||||
return response
|
||||
# Register all route modules
|
||||
main_feed.construct_main_feed_routes(rss_blueprint, datastore)
|
||||
single_watch.construct_single_watch_routes(rss_blueprint, datastore)
|
||||
tag_routes.construct_tag_routes(rss_blueprint, datastore)
|
||||
|
||||
return rss_blueprint
|
||||
105
changedetectionio/blueprint/rss/main_feed.py
Normal file
105
changedetectionio/blueprint/rss/main_feed.py
Normal file
@@ -0,0 +1,105 @@
|
||||
from flask import make_response, request, url_for, redirect
|
||||
|
||||
|
||||
|
||||
def construct_main_feed_routes(rss_blueprint, datastore):
|
||||
"""
|
||||
Construct the main RSS feed routes.
|
||||
|
||||
Args:
|
||||
rss_blueprint: The Flask blueprint to add routes to
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
|
||||
# to some earlier blueprint rerouting work, it should goto feed.
|
||||
@rss_blueprint.route("/", methods=['GET'])
|
||||
def extraslash():
|
||||
return redirect(url_for('rss.feed'))
|
||||
|
||||
# Import the login decorator if needed
|
||||
# from changedetectionio.auth_decorator import login_optionally_required
|
||||
@rss_blueprint.route("", methods=['GET'])
|
||||
def feed():
|
||||
from feedgen.feed import FeedGenerator
|
||||
from loguru import logger
|
||||
import time
|
||||
|
||||
from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT
|
||||
from ._util import (validate_rss_token, generate_watch_guid, get_rss_template,
|
||||
get_watch_label, build_notification_context, render_notification,
|
||||
populate_feed_entry, add_watch_categories)
|
||||
from ...notification_service import NotificationService
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Validate token
|
||||
is_valid, error = validate_rss_token(datastore, request)
|
||||
if not is_valid:
|
||||
return error
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
limit_tag = request.args.get('tag', '').lower().strip()
|
||||
# Be sure limit_tag is a uuid
|
||||
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
||||
if limit_tag == tag.get('title', '').lower().strip():
|
||||
limit_tag = uuid
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
|
||||
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
# @todo tag notification_muted skip also (improve Watch model)
|
||||
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
|
||||
continue
|
||||
if limit_tag and not limit_tag in watch['tags']:
|
||||
continue
|
||||
sorted_watches.append(watch)
|
||||
|
||||
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
|
||||
|
||||
fg = FeedGenerator()
|
||||
fg.title('changedetection.io')
|
||||
fg.description('Feed description')
|
||||
fg.link(href='https://changedetection.io')
|
||||
notification_service = NotificationService(datastore=datastore, notification_q=False)
|
||||
|
||||
for watch in sorted_watches:
|
||||
|
||||
dates = list(watch.history.keys())
|
||||
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
|
||||
if len(dates) < 2:
|
||||
continue
|
||||
|
||||
if not watch.viewed:
|
||||
# Re #239 - GUID needs to be individual for each event
|
||||
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
||||
watch_label = get_watch_label(datastore, watch)
|
||||
timestamp_to = dates[-1]
|
||||
timestamp_from = dates[-2]
|
||||
guid = generate_watch_guid(watch, timestamp_to)
|
||||
# Because we are called via whatever web server, flask should figure out the right path
|
||||
diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||
|
||||
# Get template and build notification context
|
||||
n_body_template = get_rss_template(datastore, watch, rss_content_format,
|
||||
RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT)
|
||||
|
||||
n_object = build_notification_context(watch, timestamp_from, timestamp_to,
|
||||
watch_label, n_body_template, rss_content_format)
|
||||
|
||||
# Render notification
|
||||
res = render_notification(n_object, notification_service, watch, datastore)
|
||||
|
||||
# Create and populate feed entry
|
||||
fe = fg.add_entry()
|
||||
populate_feed_entry(fe, watch, res['body'], guid, timestamp_to, link=diff_link)
|
||||
fe.title(title=watch_label) # Override title to not include suffix
|
||||
add_watch_categories(fe, watch, datastore)
|
||||
|
||||
response = make_response(fg.rss_str())
|
||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
|
||||
return response
|
||||
112
changedetectionio/blueprint/rss/single_watch.py
Normal file
112
changedetectionio/blueprint/rss/single_watch.py
Normal file
@@ -0,0 +1,112 @@
|
||||
|
||||
|
||||
def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
"""
|
||||
Construct RSS feed routes for single watches.
|
||||
|
||||
Args:
|
||||
rss_blueprint: The Flask blueprint to add routes to
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
|
||||
def rss_single_watch(uuid):
|
||||
import time
|
||||
|
||||
from flask import make_response, request
|
||||
from feedgen.feed import FeedGenerator
|
||||
from loguru import logger
|
||||
|
||||
from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT
|
||||
from ._util import (validate_rss_token, get_rss_template, get_watch_label,
|
||||
build_notification_context, render_notification,
|
||||
populate_feed_entry, add_watch_categories)
|
||||
from ...notification_service import NotificationService
|
||||
|
||||
"""
|
||||
Display the most recent changes for a single watch as RSS feed.
|
||||
Returns RSS XML with multiple entries showing diffs between consecutive snapshots.
|
||||
The number of entries is controlled by the rss_diff_length setting.
|
||||
"""
|
||||
now = time.time()
|
||||
|
||||
# Validate token
|
||||
is_valid, error = validate_rss_token(datastore, request)
|
||||
if not is_valid:
|
||||
return error
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
# Get the watch by UUID
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return f"Watch with UUID {uuid} not found", 404
|
||||
|
||||
# Check if watch has at least 2 history snapshots
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
|
||||
|
||||
# Get the number of diffs to include (default: 5)
|
||||
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
|
||||
|
||||
# Calculate how many diffs we can actually show (limited by available history)
|
||||
# We need at least 2 snapshots to create 1 diff
|
||||
max_possible_diffs = len(dates) - 1
|
||||
num_diffs = min(rss_diff_length, max_possible_diffs) if rss_diff_length > 0 else max_possible_diffs
|
||||
|
||||
# Create RSS feed
|
||||
fg = FeedGenerator()
|
||||
|
||||
# Set title: use "label (url)" if label differs from url, otherwise just url
|
||||
watch_url = watch.get('url', '')
|
||||
watch_label = get_watch_label(datastore, watch)
|
||||
|
||||
if watch_label != watch_url:
|
||||
feed_title = f'changedetection.io - {watch_label} ({watch_url})'
|
||||
else:
|
||||
feed_title = f'changedetection.io - {watch_url}'
|
||||
|
||||
fg.title(feed_title)
|
||||
fg.description('Changes')
|
||||
fg.link(href='https://changedetection.io')
|
||||
|
||||
# Loop through history and create RSS entries for each diff
|
||||
# Add entries in reverse order because feedgen reverses them
|
||||
# This way, the newest change appears first in the final RSS
|
||||
|
||||
notification_service = NotificationService(datastore=datastore, notification_q=False)
|
||||
for i in range(num_diffs - 1, -1, -1):
|
||||
# Calculate indices for this diff (working backwards from newest)
|
||||
# i=0: compare dates[-2] to dates[-1] (most recent change)
|
||||
# i=1: compare dates[-3] to dates[-2] (previous change)
|
||||
# etc.
|
||||
date_index_to = -(i + 1)
|
||||
date_index_from = -(i + 2)
|
||||
timestamp_to = dates[date_index_to]
|
||||
timestamp_from = dates[date_index_from]
|
||||
|
||||
# Get template and build notification context
|
||||
n_body_template = get_rss_template(datastore, watch, rss_content_format,
|
||||
RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT)
|
||||
|
||||
n_object = build_notification_context(watch, timestamp_from, timestamp_to,
|
||||
watch_label, n_body_template, rss_content_format)
|
||||
|
||||
# Render notification with date indices
|
||||
res = render_notification(n_object, notification_service, watch, datastore,
|
||||
date_index_from, date_index_to)
|
||||
|
||||
# Create and populate feed entry
|
||||
guid = f"{uuid}/{timestamp_to}"
|
||||
fe = fg.add_entry()
|
||||
title_suffix = f"Change @ {res['original_context']['change_datetime']}"
|
||||
populate_feed_entry(fe, watch, res.get('body', ''), guid, timestamp_to,
|
||||
link={'href': watch.get('url')}, title_suffix=title_suffix)
|
||||
add_watch_categories(fe, watch, datastore)
|
||||
|
||||
response = make_response(fg.rss_str())
|
||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||
logger.debug(f"RSS Single watch built in {time.time()-now:.2f}s")
|
||||
|
||||
return response
|
||||
95
changedetectionio/blueprint/rss/tag.py
Normal file
95
changedetectionio/blueprint/rss/tag.py
Normal file
@@ -0,0 +1,95 @@
|
||||
def construct_tag_routes(rss_blueprint, datastore):
|
||||
"""
|
||||
Construct RSS feed routes for tags.
|
||||
|
||||
Args:
|
||||
rss_blueprint: The Flask blueprint to add routes to
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||
def rss_tag_feed(tag_uuid):
|
||||
|
||||
from flask import make_response, request, url_for
|
||||
from feedgen.feed import FeedGenerator
|
||||
|
||||
from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT
|
||||
from ._util import (validate_rss_token, generate_watch_guid, get_rss_template,
|
||||
get_watch_label, build_notification_context, render_notification,
|
||||
populate_feed_entry, add_watch_categories)
|
||||
from ...notification_service import NotificationService
|
||||
|
||||
"""
|
||||
Display an RSS feed for all unviewed watches that belong to a specific tag.
|
||||
Returns RSS XML with entries for each unviewed watch with sufficient history.
|
||||
"""
|
||||
# Validate token
|
||||
is_valid, error = validate_rss_token(datastore, request)
|
||||
if not is_valid:
|
||||
return error
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
# Verify tag exists
|
||||
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
|
||||
if not tag:
|
||||
return f"Tag with UUID {tag_uuid} not found", 404
|
||||
|
||||
tag_title = tag.get('title', 'Unknown Tag')
|
||||
|
||||
# Create RSS feed
|
||||
fg = FeedGenerator()
|
||||
fg.title(f'changedetection.io - {tag_title}')
|
||||
fg.description(f'Changes for watches tagged with {tag_title}')
|
||||
fg.link(href='https://changedetection.io')
|
||||
notification_service = NotificationService(datastore=datastore, notification_q=False)
|
||||
# Find all watches with this tag
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
#@todo This is wrong, it needs to sort by most recently changed and then limit it datastore.data['watching'].items().sorted(?)
|
||||
# So get all watches in this tag then sort
|
||||
|
||||
# Skip if watch doesn't have this tag
|
||||
if tag_uuid not in watch.get('tags', []):
|
||||
continue
|
||||
|
||||
# Skip muted watches if configured
|
||||
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
|
||||
continue
|
||||
|
||||
# Check if watch has at least 2 history snapshots
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
continue
|
||||
|
||||
# Only include unviewed watches
|
||||
if not watch.viewed:
|
||||
# Include a link to the diff page (use uuid from loop, don't modify watch dict)
|
||||
diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=uuid, _external=True)}
|
||||
|
||||
# Get watch label
|
||||
watch_label = get_watch_label(datastore, watch)
|
||||
|
||||
# Get template and build notification context
|
||||
timestamp_to = dates[-1]
|
||||
timestamp_from = dates[-2]
|
||||
|
||||
# Generate GUID for this entry
|
||||
guid = generate_watch_guid(watch, timestamp_to)
|
||||
n_body_template = get_rss_template(datastore, watch, rss_content_format,
|
||||
RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT)
|
||||
|
||||
n_object = build_notification_context(watch, timestamp_from, timestamp_to,
|
||||
watch_label, n_body_template, rss_content_format)
|
||||
|
||||
# Render notification
|
||||
res = render_notification(n_object, notification_service, watch, datastore)
|
||||
|
||||
# Create and populate feed entry
|
||||
fe = fg.add_entry()
|
||||
title_suffix = f"Change @ {res['original_context']['change_datetime']}"
|
||||
populate_feed_entry(fe, watch, res['body'], guid, timestamp_to, link=diff_link, title_suffix=title_suffix)
|
||||
add_watch_categories(fe, watch, datastore)
|
||||
|
||||
response = make_response(fg.rss_str())
|
||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||
return response
|
||||
@@ -5,6 +5,7 @@ from zoneinfo import ZoneInfo, available_timezones
|
||||
import secrets
|
||||
import flask_login
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
@@ -17,6 +18,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@login_optionally_required
|
||||
def settings_page():
|
||||
from changedetectionio import forms
|
||||
from changedetectionio.pluggy_interface import (
|
||||
get_plugin_settings_tabs,
|
||||
load_plugin_settings,
|
||||
save_plugin_settings
|
||||
)
|
||||
|
||||
|
||||
default = deepcopy(datastore.data['settings'])
|
||||
if datastore.proxy_list is not None:
|
||||
@@ -54,7 +61,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# SALTED_PASS means the password is "locked" to what we set in the Env var
|
||||
if not os.getenv("SALTED_PASS", False):
|
||||
datastore.remove_password()
|
||||
flash("Password protection removed.", 'notice')
|
||||
flash(gettext("Password protection removed."), 'notice')
|
||||
flask_login.logout_user()
|
||||
return redirect(url_for('settings.settings_page'))
|
||||
|
||||
@@ -71,14 +78,20 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Handle dynamic worker count adjustment
|
||||
old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
|
||||
new_worker_count = form.data['requests'].get('workers', 1)
|
||||
|
||||
|
||||
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||
|
||||
|
||||
# Adjust worker count if it changed
|
||||
if new_worker_count != old_worker_count:
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
|
||||
|
||||
|
||||
# Check CPU core availability and warn if worker count is high
|
||||
cpu_count = os.cpu_count()
|
||||
if cpu_count and new_worker_count >= (cpu_count * 0.9):
|
||||
flash(gettext("Warning: Worker count ({}) is close to or exceeds available CPU cores ({})").format(
|
||||
new_worker_count, cpu_count), 'warning')
|
||||
|
||||
result = worker_handler.adjust_async_worker_count(
|
||||
new_count=new_worker_count,
|
||||
update_q=update_q,
|
||||
@@ -86,32 +99,68 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
app=app,
|
||||
datastore=ds
|
||||
)
|
||||
|
||||
|
||||
if result['status'] == 'success':
|
||||
flash(f"Worker count adjusted: {result['message']}", 'notice')
|
||||
flash(gettext("Worker count adjusted: {}").format(result['message']), 'notice')
|
||||
elif result['status'] == 'not_supported':
|
||||
flash("Dynamic worker adjustment not supported for sync workers", 'warning')
|
||||
flash(gettext("Dynamic worker adjustment not supported for sync workers"), 'warning')
|
||||
elif result['status'] == 'error':
|
||||
flash(f"Error adjusting workers: {result['message']}", 'error')
|
||||
flash(gettext("Error adjusting workers: {}").format(result['message']), 'error')
|
||||
|
||||
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
|
||||
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
|
||||
datastore.needs_write_urgent = True
|
||||
flash("Password protection enabled.", 'notice')
|
||||
flash(gettext("Password protection enabled."), 'notice')
|
||||
flask_login.logout_user()
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
datastore.needs_write_urgent = True
|
||||
flash("Settings updated.")
|
||||
|
||||
# Also save plugin settings from the same form submission
|
||||
plugin_tabs_list = get_plugin_settings_tabs()
|
||||
for tab in plugin_tabs_list:
|
||||
plugin_id = tab['plugin_id']
|
||||
form_class = tab['form_class']
|
||||
|
||||
# Instantiate plugin form with POST data
|
||||
plugin_form = form_class(formdata=request.form)
|
||||
|
||||
# Save plugin settings (validation is optional for plugins)
|
||||
if plugin_form.data:
|
||||
save_plugin_settings(datastore.datastore_path, plugin_id, plugin_form.data)
|
||||
|
||||
flash(gettext("Settings updated."))
|
||||
|
||||
else:
|
||||
flash("An error occurred, please see below.", "error")
|
||||
flash(gettext("An error occurred, please see below."), "error")
|
||||
|
||||
# Convert to ISO 8601 format, all date/time relative events stored as UTC time
|
||||
utc_time = datetime.now(ZoneInfo("UTC")).isoformat()
|
||||
|
||||
# Get active plugins
|
||||
from changedetectionio.pluggy_interface import get_active_plugins
|
||||
import sys
|
||||
active_plugins = get_active_plugins()
|
||||
python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
||||
|
||||
# Get plugin settings tabs and instantiate forms
|
||||
plugin_tabs = get_plugin_settings_tabs()
|
||||
plugin_forms = {}
|
||||
|
||||
for tab in plugin_tabs:
|
||||
plugin_id = tab['plugin_id']
|
||||
form_class = tab['form_class']
|
||||
|
||||
# Load existing settings
|
||||
settings = load_plugin_settings(datastore.datastore_path, plugin_id)
|
||||
|
||||
# Instantiate the form with existing settings
|
||||
plugin_forms[plugin_id] = form_class(data=settings)
|
||||
|
||||
output = render_template("settings.html",
|
||||
active_plugins=active_plugins,
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
python_version=python_version,
|
||||
available_timezones=sorted(available_timezones()),
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
|
||||
@@ -121,6 +170,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
||||
utc_time=utc_time,
|
||||
plugin_tabs=plugin_tabs,
|
||||
plugin_forms=plugin_forms,
|
||||
)
|
||||
|
||||
return output
|
||||
@@ -131,7 +182,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
secret = secrets.token_hex(16)
|
||||
datastore.data['settings']['application']['api_access_token'] = secret
|
||||
datastore.needs_write_urgent = True
|
||||
flash("API Key was regenerated.")
|
||||
flash(gettext("API Key was regenerated."))
|
||||
return redirect(url_for('settings.settings_page')+'#api')
|
||||
|
||||
@settings_blueprint.route("/notification-logs", methods=['GET'])
|
||||
@@ -142,4 +193,32 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."])
|
||||
return output
|
||||
|
||||
@settings_blueprint.route("/toggle-all-paused", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def toggle_all_paused():
|
||||
current_state = datastore.data['settings']['application'].get('all_paused', False)
|
||||
datastore.data['settings']['application']['all_paused'] = not current_state
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
if datastore.data['settings']['application']['all_paused']:
|
||||
flash(gettext("Automatic scheduling paused - checks will not be queued."), 'notice')
|
||||
else:
|
||||
flash(gettext("Automatic scheduling resumed - checks will be queued normally."), 'notice')
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@settings_blueprint.route("/toggle-all-muted", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def toggle_all_muted():
|
||||
current_state = datastore.data['settings']['application'].get('all_muted', False)
|
||||
datastore.data['settings']['application']['all_muted'] = not current_state
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
if datastore.data['settings']['application']['all_muted']:
|
||||
flash(gettext("All notifications muted."), 'notice')
|
||||
else:
|
||||
flash(gettext("All notifications unmuted."), 'notice')
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return settings_blueprint
|
||||
@@ -4,7 +4,7 @@
|
||||
<div class="edit-form">
|
||||
<div class="inner">
|
||||
|
||||
<h4 style="margin-top: 0px;">Notification debug log</h4>
|
||||
<h4 style="margin-top: 0px;">{{ _('Notification debug log') }}</h4>
|
||||
<div id="notification-error-log">
|
||||
<ul style="font-size: 80%; margin:0px; padding: 0 0 0 7px">
|
||||
{% for log in logs|reverse %}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
{% from '_common_fields.html' import render_common_settings_form, show_token_placeholders %}
|
||||
<script>
|
||||
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
|
||||
{% if emailprefix %}
|
||||
@@ -18,14 +18,22 @@
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
<li class="tab"><a href="#fetching">Fetching</a></li>
|
||||
<li class="tab"><a href="#filters">Global Filters</a></li>
|
||||
<li class="tab"><a href="#ui-options">UI Options</a></li>
|
||||
<li class="tab"><a href="#api">API</a></li>
|
||||
<li class="tab"><a href="#timedate">Time & Date</a></li>
|
||||
<li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li>
|
||||
<li class="tab" id=""><a href="#general">{{ _('General') }}</a></li>
|
||||
<li class="tab"><a href="#notifications">{{ _('Notifications') }}</a></li>
|
||||
<li class="tab"><a href="#fetching">{{ _('Fetching') }}</a></li>
|
||||
<li class="tab"><a href="#filters">{{ _('Global Filters') }}</a></li>
|
||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.index') }}" class="pure-menu-link">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
{% for tab in plugin_tabs %}
|
||||
<li class="tab"><a href="#plugin-{{ tab.plugin_id }}">{{ tab.tab_label }}</a></li>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#info">{{ _('Info') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
@@ -35,23 +43,20 @@
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.time_between_check, class="time-check-widget") }}
|
||||
<span class="pure-form-message-inline">Default recheck time for all watches, current system minimum is <i>{{min_system_recheck_seconds}}</i> seconds (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Misc-system-settings#enviroment-variables">more info</a>).</span>
|
||||
|
||||
<span class="pure-form-message-inline">{{ _('Default recheck time for all watches, current system minimum is') }} <i>{{min_system_recheck_seconds}}</i> {{ _('seconds') }} (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Misc-system-settings#enviroment-variables">{{ _('more info') }}</a>).</span>
|
||||
<div id="time-between-check-schedule">
|
||||
<!-- Start Time and End Time -->
|
||||
<!-- Start Time and End Time {{ timezone_default_config }} -->
|
||||
<div id="limit-between-time">
|
||||
{{ render_time_schedule_form(form.requests, available_timezones, timezone_default_config) }}
|
||||
{{ render_time_schedule_form(form.requests, available_timezones, timezone_default_config) }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
|
||||
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
|
||||
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
|
||||
<span class="pure-form-message-inline">{{ _('After this many consecutive times that the CSS/xPath filter is missing, send a notification') }}
|
||||
<br>
|
||||
Set to <strong>0</strong> to disable
|
||||
{{ _('Set to') }} <strong>0</strong> {{ _('to disable') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
@@ -60,49 +65,33 @@
|
||||
{{ render_button(form.application.form.removepassword_button) }}
|
||||
{% else %}
|
||||
{{ render_field(form.application.form.password) }}
|
||||
<span class="pure-form-message-inline">Password protection for your changedetection.io application.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Password protection for your changedetection.io application.') }}</span>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<span class="pure-form-message-inline">Password is locked.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Password is locked.') }}</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
|
||||
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
|
||||
</span>
|
||||
<span class="pure-form-message-inline">{{ _('Allow access to the watch change history page when password is enabled (Good for sharing the diff page)') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||
</div>
|
||||
<div class="grey-form-border">
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
|
||||
<span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
|
||||
</div>
|
||||
<span class="pure-form-message-inline">{{ _('When a request returns no content, or the HTML does not contain any text, is this considered a change?') }}</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="notifications">
|
||||
<fieldset>
|
||||
<div class="field-group">
|
||||
{{ render_common_settings_form(form.application.form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
|
||||
</div>
|
||||
{{ render_common_settings_form(form.application.form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
|
||||
</fieldset>
|
||||
<div class="pure-control-group" id="notification-base-url">
|
||||
{{ render_field(form.application.form.base_url, class="m-d") }}
|
||||
<span class="pure-form-message-inline">
|
||||
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notification links.<br>
|
||||
Default value is the system environment variable '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
|
||||
{{ _('Base URL used for the') }} <code>{{ '{{ base_url }}' }}</code> {{ _('token in notification links.') }}<br>
|
||||
{{ _('Default value is the system environment variable') }} '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">{{ _('read more here') }}</a>.
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
@@ -111,15 +100,15 @@
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched sites don\'t need Javascript to render.') }}</p>
|
||||
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var') }} 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
</div>
|
||||
<fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
|
||||
<strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
|
||||
<br>
|
||||
This will wait <i>n</i> seconds before extracting the text.
|
||||
{{ _('This will wait') }} <i>n</i> {{ _('seconds before extracting the text.') }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.webdriver_delay) }}
|
||||
@@ -128,23 +117,27 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.workers) }}
|
||||
{% set worker_info = get_worker_status_info() %}
|
||||
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
|
||||
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.') }}<br>
|
||||
{{ _('Currently running:') }} <strong>{{ worker_info.count }}</strong> {{ _('operational') }} {{ worker_info.type }} {{ _('workers') }}{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} {{ _('actively processing') }}){% endif %}.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
|
||||
<span class="pure-form-message-inline">{{ _('Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.timeout) }}
|
||||
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
|
||||
<span class="pure-form-message-inline">{{ _('For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.') }}</span><br>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.default_ua) }}
|
||||
<span class="pure-form-message-inline">
|
||||
Applied to all requests.<br><br>
|
||||
Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it's important to consider <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">all of the ways that the browser is detected</a>.
|
||||
{{ _('Applied to all requests.') }}<br><br>
|
||||
{{ _('Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it\'s important to consider') }} <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">{{ _('all of the ways that the browser is detected') }}</a>.
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
@@ -153,15 +146,15 @@
|
||||
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.ignore_whitespace) }}
|
||||
<span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
<span class="pure-form-message-inline">{{ _('Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.') }}<br>
|
||||
<i>{{ _('Note:') }}</i> {{ _('Changing this will change the status of your existing watches, possibly trigger alerts etc.') }}
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.render_anchor_tag_content) }}
|
||||
<span class="pure-form-message-inline">Render anchor tag content, default disabled, when enabled renders links as <code>(link text)[https://somesite.com]</code>
|
||||
<span class="pure-form-message-inline">{{ _('Render anchor tag content, default disabled, when enabled renders links as') }} <code>(link text)[https://somesite.com]</code>
|
||||
<br>
|
||||
<i>Note:</i> Changing this could affect the content of your existing watches, possibly trigger alerts etc.
|
||||
<i>{{ _('Note:') }}</i> {{ _('Changing this could affect the content of your existing watches, possibly trigger alerts etc.') }}
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
@@ -172,9 +165,9 @@ nav
|
||||
//*[contains(text(), 'Advertisement')]") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||
<li> {{ _('Remove HTML element(s) by CSS and XPath selectors before text conversion.') }} </li>
|
||||
<li> {{ _('Don\'t paste HTML here, use only CSS and XPath selectors') }} </li>
|
||||
<li> {{ _('Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML.') }} </li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
@@ -182,88 +175,118 @@ nav
|
||||
{{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
|
||||
/some.regex\d{2}/ for case-INsensitive regex
|
||||
") }}
|
||||
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
|
||||
<span class="pure-form-message-inline">{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</span><br>
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
|
||||
<li>Note: This is applied globally in addition to the per-watch rules.</li>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
<li>{{ _('Matching text will be') }} <strong>{{ _('ignored') }}</strong> {{ _('in the text snapshot (you can still see it but it wont trigger a change)') }}</li>
|
||||
<li>{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</li>
|
||||
<li>{{ _('Each line processed separately, any line matching will be ignored (removed before creating the checksum)') }}</li>
|
||||
<li>{{ _('Regular Expression support, wrap the entire line in forward slash') }} <code>/regex/</code></li>
|
||||
<li>{{ _('Changing this will affect the comparison checksum which may trigger an alert') }}</li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.strip_ignored_lines) }}
|
||||
<span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
<span class="pure-form-message-inline">{{ _('Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)') }}<br>
|
||||
<i>{{ _('Note:') }}</i> {{ _('Changing this will change the status of your existing watches, possibly trigger alerts etc.') }}
|
||||
</span>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="api">
|
||||
<h4>API Access</h4>
|
||||
<p>Drive your changedetection.io via API, More about <a href="https://changedetection.io/docs/api_v1/index.html">API access and examples here</a>.</p>
|
||||
<h4>{{ _('API Access') }}</h4>
|
||||
<p>{{ _('Drive your changedetection.io via API, More about') }} <a href="https://changedetection.io/docs/api_v1/index.html">{{ _('API access and examples here') }}</a>.</p>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
|
||||
<div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header - required for the Chrome Extension to work</div><br>
|
||||
<div class="pure-form-message-inline"><br>API Key <span id="api-key">{{api_key}}</span>
|
||||
<span style="display:none;" id="api-key-copy" >copy</span>
|
||||
<div class="pure-form-message-inline">{{ _('Restrict API access limit by using') }} <code>x-api-key</code> {{ _('header - required for the Chrome Extension to work') }}</div><br>
|
||||
<div class="pure-form-message-inline"><br>{{ _('API Key') }} <span id="api-key">{{api_key}}</span>
|
||||
<span style="display:none;" id="api-key-copy" >{{ _('copy') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<a href="{{url_for('settings.settings_reset_api_key')}}" class="pure-button button-small button-cancel">Regenerate API key</a>
|
||||
<a href="{{url_for('settings.settings_reset_api_key')}}" class="pure-button button-small button-cancel">{{ _('Regenerate API key') }}</a>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<h4>Chrome Extension</h4>
|
||||
<p>Easily add any web-page to your changedetection.io installation from within Chrome.</p>
|
||||
<strong>Step 1</strong> Install the extension, <strong>Step 2</strong> Navigate to this page,
|
||||
<strong>Step 3</strong> Open the extension from the toolbar and click "<i>Sync API Access</i>"
|
||||
<h4>{{ _('Chrome Extension') }}</h4>
|
||||
<p>{{ _('Easily add any web-page to your changedetection.io installation from within Chrome.') }}</p>
|
||||
<strong>{{ _('Step 1') }}</strong> {{ _('Install the extension,') }} <strong>{{ _('Step 2') }}</strong> {{ _('Navigate to this page,') }}
|
||||
<strong>{{ _('Step 3') }}</strong> {{ _('Open the extension from the toolbar and click') }} "<i>{{ _('Sync API Access') }}</i>"
|
||||
<p>
|
||||
<a id="chrome-extension-link"
|
||||
title="Try our new Chrome Extension!"
|
||||
title="{{ _('Try our new Chrome Extension!') }}"
|
||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" alt="Chrome">
|
||||
Chrome Webstore
|
||||
<img alt="{{ _('Chrome store icon') }}" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" >
|
||||
{{ _('Chrome Webstore') }}
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="timedate">
|
||||
<div class="tab-pane-inner" id="rss">
|
||||
<div class="pure-control-group">
|
||||
Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.
|
||||
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
||||
<p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
||||
<p>
|
||||
{{ render_field(form.application.form.rss_diff_length) }}
|
||||
<span class="pure-form-message-inline">{{ _('Maximum number of history snapshots to include in the watch specific RSS feed.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
|
||||
<span class="pure-form-message-inline">{{ _('For watching other RSS feeds - When watching RSS/Atom feeds, convert them into clean text for better change detection.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group grey-form-border">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">{{ _('Does your reader support HTML? Set it here') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_template_type) }}
|
||||
<span class="pure-form-message-inline">{{ _('\'System default\' for the same template for all items, or re-use your "Notification Body" as the template.') }}</span>
|
||||
</div>
|
||||
<div>
|
||||
{{ render_field(form.application.form.rss_template_override) }}
|
||||
{{ show_token_placeholders(extra_notification_token_placeholder_info=extra_notification_token_placeholder_info, suffix="-rss") }}
|
||||
</div>
|
||||
</div>
|
||||
<br>
|
||||
|
||||
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="timedate">
|
||||
<div class="pure-control-group">
|
||||
{{ _('Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.') }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<p><strong>{{ _('UTC Time & Date from Server:') }}</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
||||
<p><strong>{{ _('Local Time & Date in Browser:') }}</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
||||
<div>
|
||||
{{ render_field(form.application.form.scheduler_timezone_default) }}
|
||||
<datalist id="timezones" style="display: none;">
|
||||
{%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%}
|
||||
</datalist>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="ui-options">
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
|
||||
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }}
|
||||
<span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span>
|
||||
<span class="pure-form-message-inline">{{ _('Realtime UI Updates Enabled - (Restart required if this is changed)') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
|
||||
<span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
|
||||
<span class="pure-form-message-inline">{{ _('Enable or Disable Favicons next to the watch list') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.pager_size) }}
|
||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Number of items per page in the watch overview list, 0 to disable.') }}</span>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
@@ -311,18 +334,18 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
<span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br>
|
||||
<span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span>
|
||||
<span class="pure-form-message-inline">{{ _('"Name" will be used for selecting the proxy in the Watch Edit settings') }}</span><br>
|
||||
<span class="pure-form-message-inline">{{ _('SOCKS5 proxies with authentication are only supported with \'plain requests\' fetcher, for other fetchers you should whitelist the IP access instead') }}</span>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">Choose a default proxy for all watches</span>
|
||||
<span class="pure-form-message-inline">{{ _('Choose a default proxy for all watches') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
@@ -334,13 +357,51 @@ nav
|
||||
</p>
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
{% if plugin_tabs %}
|
||||
{% for tab in plugin_tabs %}
|
||||
<div class="tab-pane-inner" id="plugin-{{ tab.plugin_id }}">
|
||||
{% set plugin_form = plugin_forms[tab.plugin_id] %}
|
||||
{% if tab.template_path %}
|
||||
{# Plugin provides custom template - include it directly (no separate form) #}
|
||||
{% include tab.template_path with context %}
|
||||
{% else %}
|
||||
{# Default form rendering - fields only, no submit button #}
|
||||
<fieldset>
|
||||
{% for field in plugin_form %}
|
||||
{% if field.type != 'CSRFToken' and field.type != 'SubmitField' %}
|
||||
<div class="pure-control-group">
|
||||
{% if field.type == 'BooleanField' %}
|
||||
{{ render_checkbox_field(field) }}
|
||||
{% else %}
|
||||
{{ render_field(field) }}
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</fieldset>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<div class="tab-pane-inner" id="info">
|
||||
<p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
|
||||
<p><strong>{{ _('Plugins active:') }}</strong></p>
|
||||
{% if active_plugins %}
|
||||
<ul>
|
||||
{% for plugin in active_plugins %}
|
||||
<li><strong>{{ plugin.name }}</strong> - {{ plugin.description }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>{{ _('No plugins active') }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div id="actions">
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(form.save_button) }}
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">Back</a>
|
||||
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">Clear Snapshot History</a>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">{{ _('Back') }}</a>
|
||||
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">{{ _('Clear Snapshot History') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import threading
|
||||
from flask import Blueprint, request, render_template, flash, url_for, redirect
|
||||
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
@@ -21,9 +23,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
|
||||
|
||||
output = render_template("groups-overview.html",
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
available_tags=sorted_tags,
|
||||
form=add_form,
|
||||
tag_count=tag_count
|
||||
tag_count=tag_count,
|
||||
)
|
||||
|
||||
return output
|
||||
@@ -42,11 +45,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
title = request.form.get('name').strip()
|
||||
|
||||
if datastore.tag_exists_by_name(title):
|
||||
flash(f'The tag "{title}" already exists', "error")
|
||||
flash(gettext('The tag "{}" already exists').format(title), "error")
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
datastore.add_tag(title)
|
||||
flash("Tag added")
|
||||
flash(gettext("Tag added"))
|
||||
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
@@ -61,39 +64,73 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete(uuid):
|
||||
removed = 0
|
||||
# Delete the tag, and any tag reference
|
||||
# Delete the tag from settings immediately
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
del datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
removed += 1
|
||||
watch['tags'].remove(uuid)
|
||||
# Remove tag from all watches in background thread to avoid blocking
|
||||
def remove_tag_background(tag_uuid):
|
||||
"""Background thread to remove tag from watches - discarded after completion."""
|
||||
removed_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
removed_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} removed from {removed_count} watches")
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing tag from watches: {e}")
|
||||
|
||||
flash(f"Tag deleted and removed from {removed} watches")
|
||||
# Start daemon thread
|
||||
threading.Thread(target=remove_tag_background, args=(uuid,), daemon=True).start()
|
||||
|
||||
flash(gettext("Tag deleted, removing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/unlink/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def unlink(uuid):
|
||||
unlinked = 0
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
unlinked += 1
|
||||
watch['tags'].remove(uuid)
|
||||
# Unlink tag from all watches in background thread to avoid blocking
|
||||
def unlink_tag_background(tag_uuid):
|
||||
"""Background thread to unlink tag from watches - discarded after completion."""
|
||||
unlinked_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
unlinked_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} unlinked from {unlinked_count} watches")
|
||||
except Exception as e:
|
||||
logger.error(f"Error unlinking tag from watches: {e}")
|
||||
|
||||
flash(f"Tag unlinked removed from {unlinked} watches")
|
||||
# Start daemon thread
|
||||
threading.Thread(target=unlink_tag_background, args=(uuid,), daemon=True).start()
|
||||
|
||||
flash(gettext("Unlinking tag from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete_all", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete_all():
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
watch['tags'] = []
|
||||
# Clear all tags from settings immediately
|
||||
datastore.data['settings']['application']['tags'] = {}
|
||||
|
||||
flash(f"All tags deleted")
|
||||
# Clear tags from all watches in background thread to avoid blocking
|
||||
def clear_all_tags_background():
|
||||
"""Background thread to clear tags from all watches - discarded after completion."""
|
||||
cleared_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
watch['tags'] = []
|
||||
cleared_count += 1
|
||||
logger.info(f"Background: Cleared tags from {cleared_count} watches")
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing tags from watches: {e}")
|
||||
|
||||
# Start daemon thread
|
||||
threading.Thread(target=clear_all_tags_background, daemon=True).start()
|
||||
|
||||
flash(gettext("All tags deleted, clearing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['GET'])
|
||||
@@ -105,7 +142,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
default = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if not default:
|
||||
flash("Tag not found", "error")
|
||||
flash(gettext("Tag not found"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
form = group_restock_settings_form(
|
||||
@@ -149,9 +186,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
included_content = template.render(**template_args)
|
||||
|
||||
output = render_template("edit-tag.html",
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
extra_form_content=included_content,
|
||||
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
**template_args
|
||||
)
|
||||
|
||||
@@ -180,7 +217,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
datastore.data['settings']['application']['tags'][uuid].update(form.data)
|
||||
datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.needs_write_urgent = True
|
||||
flash("Updated")
|
||||
flash(gettext("Updated"))
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
|
||||
@@ -24,12 +24,12 @@
|
||||
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab" id=""><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
<li class="tab" id=""><a href="#general">{{ _('General') }}</a></li>
|
||||
<li class="tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
<li class="tab"><a href="#notifications">{{ _('Notifications') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -47,10 +47,10 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||
<p>These settings are <strong><i>added</i></strong> to any existing watch configurations.</p>
|
||||
<p>{{ _('These settings are') }} <strong><i>{{ _('added') }}</i></strong> {{ _('to any existing watch configurations.') }}</p>
|
||||
{% include "edit/include_subtract.html" %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<h3>Text filtering</h3>
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
{% include "edit/text-options.html" %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -70,18 +70,18 @@
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_screenshot) }}
|
||||
<span class="pure-form-message-inline">
|
||||
<strong>Use with caution!</strong> This will easily fill up your email storage quota or flood other storages.
|
||||
<strong>{{ _('Use with caution!') }}</strong> {{ _('This will easily fill up your email storage quota or flood other storages.') }}
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="field-group" id="notification-field-group">
|
||||
{% if has_default_notification_urls %}
|
||||
<div class="inline-warning">
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="Look out!" title="Lookout!" >
|
||||
There are <a href="{{ url_for('settings.settings_page')}}#notifications">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications.
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Look out!') }}" title="{{ _('Lookout!') }}" >
|
||||
{{ _('There are') }} <a href="{{ url_for('settings.settings_page')}}#notifications">{{ _('system-wide notification URLs enabled') }}</a>, {{ _('this form will override notification settings for this watch only') }} ‐ {{ _('an empty Notification URL list here will still send notifications.') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">Use system defaults</a>
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">{{ _('Use system defaults') }}</a>
|
||||
|
||||
{{ render_common_settings_form(form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
|
||||
</div>
|
||||
|
||||
@@ -2,22 +2,23 @@
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
|
||||
<div class="box">
|
||||
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<fieldset>
|
||||
<legend>Add a new organisational tag</legend>
|
||||
<legend>{{ _('Add a new organisational tag') }}</legend>
|
||||
<div id="watch-add-wrapper-zone">
|
||||
<div>
|
||||
{{ render_simple_field(form.name, placeholder="Watch group / tag") }}
|
||||
{{ render_simple_field(form.name, placeholder=_("Watch group / tag")) }}
|
||||
</div>
|
||||
<div>
|
||||
{{ render_simple_field(form.save_button, title="Save" ) }}
|
||||
{{ render_simple_field(form.save_button, title=_("Save") ) }}
|
||||
</div>
|
||||
</div>
|
||||
<br>
|
||||
<div style="color: #fff;">Groups allows you to manage filters and notifications for multiple watches under a single organisational tag.</div>
|
||||
<div style="color: #fff;">{{ _('Groups allows you to manage filters and notifications for multiple watches under a single organisational tag.') }}</div>
|
||||
</fieldset>
|
||||
</form>
|
||||
<!-- @todo maybe some overview matrix, 'tick' with which has notification, filter rules etc -->
|
||||
@@ -27,8 +28,8 @@
|
||||
<thead>
|
||||
<tr>
|
||||
<th></th>
|
||||
<th># Watches</th>
|
||||
<th>Tag / Label name</th>
|
||||
<th>{{ _('# Watches') }}</th>
|
||||
<th>{{ _('Tag / Label name') }}</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
@@ -38,7 +39,7 @@
|
||||
--->
|
||||
{% if not available_tags|length %}
|
||||
<tr>
|
||||
<td colspan="3">No website organisational tags/groups configured</td>
|
||||
<td colspan="3">{{ _('No website organisational tags/groups configured') }}</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% for uuid, tag in available_tags %}
|
||||
@@ -49,9 +50,25 @@
|
||||
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
|
||||
<td>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">Edit</a>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.delete', uuid=uuid) }}" title="Deletes and removes tag">Delete</a>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.unlink', uuid=uuid) }}" title="Keep the tag but unlink any watches">Unlink</a>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
|
||||
<a class="pure-button button-error"
|
||||
href="{{ url_for('tags.delete', uuid=uuid) }}"
|
||||
data-requires-confirm
|
||||
data-confirm-type="danger"
|
||||
data-confirm-title="{{ _('Delete Group?') }}"
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to delete group <strong>%(title)s</strong>?</p><p>This action cannot be undone.</p>', title=tag.title) }}"
|
||||
data-confirm-button="{{ _('Delete') }}"
|
||||
title="{{ _('Deletes and removes tag') }}">{{ _('Delete') }}</a>
|
||||
<a class="pure-button button-warning"
|
||||
href="{{ url_for('tags.unlink', uuid=uuid) }}"
|
||||
data-requires-confirm
|
||||
data-confirm-type="warning"
|
||||
data-confirm-title="{{ _('Unlink Group?') }}"
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to unlink all watches from group <strong>%(title)s</strong>?</p><p>The tag will be kept but watches will be removed from it.</p>', title=tag.title) }}"
|
||||
data-confirm-button="{{ _('Unlink') }}"
|
||||
title="{{ _('Keep the tag but unlink any watches') }}">{{ _('Unlink') }}</a>
|
||||
<a href="{{ url_for('rss.rss_tag_feed', tag_uuid=uuid, token=app_rss_token)}}"><img alt="{{ _('RSS Feed for this watch') }}" style="padding-left: 1em;" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
import time
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||
import threading
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session, current_app
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint
|
||||
from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint
|
||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||
from changedetectionio.blueprint.ui import diff, preview
|
||||
|
||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
from flask import request, flash
|
||||
@@ -15,42 +18,42 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.delete(uuid)
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches deleted")
|
||||
flash(gettext("{} watches deleted").format(len(uuids)))
|
||||
|
||||
elif op == 'pause':
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['paused'] = True
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches paused")
|
||||
flash(gettext("{} watches paused").format(len(uuids)))
|
||||
|
||||
elif op == 'unpause':
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches unpaused")
|
||||
flash(gettext("{} watches unpaused").format(len(uuids)))
|
||||
|
||||
elif (op == 'mark-viewed'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.set_last_viewed(uuid, int(time.time()))
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches updated")
|
||||
flash(gettext("{} watches updated").format(len(uuids)))
|
||||
|
||||
elif (op == 'mute'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = True
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches muted")
|
||||
flash(gettext("{} watches muted").format(len(uuids)))
|
||||
|
||||
elif (op == 'unmute'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = False
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches un-muted")
|
||||
flash(gettext("{} watches un-muted").format(len(uuids)))
|
||||
|
||||
elif (op == 'recheck'):
|
||||
for uuid in uuids:
|
||||
@@ -58,21 +61,21 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
# Recheck and require a full reprocessing
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches queued for rechecking")
|
||||
flash(gettext("{} watches queued for rechecking").format(len(uuids)))
|
||||
|
||||
elif (op == 'clear-errors'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]["last_error"] = False
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches errors cleared")
|
||||
flash(gettext("{} watches errors cleared").format(len(uuids)))
|
||||
|
||||
elif (op == 'clear-history'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.clear_watch_history(uuid)
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches cleared/reset.")
|
||||
flash(gettext("{} watches cleared/reset.").format(len(uuids)))
|
||||
|
||||
elif (op == 'notification-default'):
|
||||
from changedetectionio.notification import (
|
||||
@@ -85,7 +88,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
datastore.data['watching'][uuid]['notification_urls'] = []
|
||||
datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches set to use default notification settings")
|
||||
flash(gettext("{} watches set to use default notification settings").format(len(uuids)))
|
||||
|
||||
elif (op == 'assign-tag'):
|
||||
op_extradata = extra_data
|
||||
@@ -100,7 +103,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
|
||||
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches were tagged")
|
||||
flash(gettext("{} watches were tagged").format(len(uuids)))
|
||||
|
||||
if uuids:
|
||||
for uuid in uuids:
|
||||
@@ -121,6 +124,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update)
|
||||
ui_blueprint.register_blueprint(views_blueprint)
|
||||
|
||||
# Register diff and preview blueprints
|
||||
diff_blueprint = diff.construct_blueprint(datastore)
|
||||
ui_blueprint.register_blueprint(diff_blueprint)
|
||||
|
||||
preview_blueprint = preview.construct_blueprint(datastore)
|
||||
ui_blueprint.register_blueprint(preview_blueprint)
|
||||
|
||||
# Import the login decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -130,9 +140,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
try:
|
||||
datastore.clear_watch_history(uuid)
|
||||
except KeyError:
|
||||
flash('Watch not found', 'error')
|
||||
flash(gettext('Watch not found'), 'error')
|
||||
else:
|
||||
flash("Cleared snapshot history for watch {}".format(uuid))
|
||||
flash(gettext("Cleared snapshot history for watch {}").format(uuid))
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/clear_history", methods=['GET', 'POST'])
|
||||
@@ -142,11 +152,26 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext == 'clear':
|
||||
for uuid in datastore.data['watching'].keys():
|
||||
datastore.clear_watch_history(uuid)
|
||||
flash("Cleared snapshot history for all watches")
|
||||
# Run in background thread to avoid blocking
|
||||
def clear_history_background():
|
||||
# Capture UUIDs first to avoid race conditions
|
||||
watch_uuids = list(datastore.data['watching'].keys())
|
||||
logger.info(f"Background: Clearing history for {len(watch_uuids)} watches")
|
||||
|
||||
for uuid in watch_uuids:
|
||||
try:
|
||||
datastore.clear_watch_history(uuid)
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing history for watch {uuid}: {e}")
|
||||
|
||||
logger.info("Background: Completed clearing history")
|
||||
|
||||
# Start daemon thread
|
||||
threading.Thread(target=clear_history_background, daemon=True).start()
|
||||
|
||||
flash(gettext("History clearing started in background"))
|
||||
else:
|
||||
flash('Incorrect confirmation text.', 'error')
|
||||
flash(gettext('Incorrect confirmation text.'), 'error')
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@@ -160,18 +185,32 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
# Save the current newest history as the most recently viewed
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
tag_limit = request.args.get('tag')
|
||||
logger.debug(f"Limiting to tag {tag_limit}")
|
||||
now = int(time.time())
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
|
||||
if tag_limit and ( not watch.get('tags') or tag_limit not in watch['tags'] ):
|
||||
logger.debug(f"Skipping watch {watch_uuid}")
|
||||
continue
|
||||
# Mark watches as viewed in background thread to avoid blocking
|
||||
def mark_viewed_background():
|
||||
"""Background thread to mark watches as viewed - discarded after completion."""
|
||||
marked_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
if tag_limit and (not watch.get('tags') or tag_limit not in watch['tags']):
|
||||
continue
|
||||
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
marked_count += 1
|
||||
|
||||
logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background mark as viewed: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=mark_viewed_background, daemon=True)
|
||||
thread.start()
|
||||
|
||||
flash(gettext("Marking watches as viewed in background..."))
|
||||
return redirect(url_for('watchlist.index', tag=tag_limit))
|
||||
|
||||
@ui_blueprint.route("/delete", methods=['GET'])
|
||||
@@ -180,14 +219,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
uuid = request.args.get('uuid')
|
||||
|
||||
if uuid != 'all' and not uuid in datastore.data['watching'].keys():
|
||||
flash('The watch by UUID {} does not exist.'.format(uuid), 'error')
|
||||
flash(gettext('The watch by UUID {} does not exist.').format(uuid), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
datastore.delete(uuid)
|
||||
flash('Deleted.')
|
||||
flash(gettext('Deleted.'))
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@@ -204,7 +243,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
|
||||
flash('Cloned, you are editing the new watch.')
|
||||
flash(gettext('Cloned, you are editing the new watch.'))
|
||||
|
||||
return redirect(url_for("ui.ui_edit.edit_page", uuid=new_uuid))
|
||||
|
||||
@@ -216,40 +255,83 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
uuid = request.args.get('uuid')
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
|
||||
i = 0
|
||||
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
if uuid:
|
||||
if uuid not in running_uuids:
|
||||
# Single watch - check if already queued or running
|
||||
if worker_handler.is_watch_running(uuid) or uuid in update_q.get_queued_uuids():
|
||||
flash(gettext("Watch is already queued or being checked."))
|
||||
else:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
i += 1
|
||||
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
else:
|
||||
# Recheck all, including muted
|
||||
# Get most overdue first
|
||||
# Multiple watches - first count how many need to be queued
|
||||
watches_to_queue = []
|
||||
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||
watch_uuid = k[0]
|
||||
watch = k[1]
|
||||
if not watch['paused']:
|
||||
if watch_uuid not in running_uuids:
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
if not watch['paused'] and watch_uuid:
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
if tag != None and tag not in watch['tags']:
|
||||
continue
|
||||
watches_to_queue.append(watch_uuid)
|
||||
|
||||
if tag != None and tag not in watch['tags']:
|
||||
continue
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
i += 1
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = []
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid not in queued_uuids and watch_uuid not in running_uuids:
|
||||
watches_to_queue_filtered.append(watch_uuid)
|
||||
|
||||
if i == 1:
|
||||
flash("Queued 1 watch for rechecking.")
|
||||
if i > 1:
|
||||
flash(f"Queued {i} watches for rechecking.")
|
||||
if i == 0:
|
||||
flash("No watches available to recheck.")
|
||||
# Queue only the filtered watches
|
||||
for watch_uuid in watches_to_queue_filtered:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
if skipped_count > 0:
|
||||
flash(gettext("Queued {} watches for rechecking ({} already queued or running).").format(
|
||||
len(watches_to_queue_filtered), skipped_count))
|
||||
else:
|
||||
if len(watches_to_queue_filtered) == 1:
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
else:
|
||||
flash(gettext("Queued {} watches for rechecking.").format(len(watches_to_queue_filtered)))
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking HTTP response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
|
||||
def queue_watches_background():
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
try:
|
||||
queued_count = 0
|
||||
skipped_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if watch_uuid not in queued_uuids and watch_uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
logger.info(f"Background queueing complete: {queued_count} watches queued, {skipped_count} skipped (already queued/running)")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=queue_watches_background, daemon=True, name="QueueWatches-Background")
|
||||
thread.start()
|
||||
|
||||
# Return immediately with approximate message
|
||||
flash(gettext("Queueing watches for rechecking in background..."))
|
||||
|
||||
return redirect(url_for('watchlist.index', **({'tag': tag} if tag else {})))
|
||||
|
||||
@ui_blueprint.route("/form/checkbox-operations", methods=['POST'])
|
||||
@login_optionally_required
|
||||
@@ -318,8 +400,29 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error sharing -{str(e)}")
|
||||
flash(f"Could not share, something went wrong while communicating with the share server - {str(e)}", 'error')
|
||||
flash(gettext("Could not share, something went wrong while communicating with the share server - {}").format(str(e)), 'error')
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/language/auto-detect", methods=['GET'])
|
||||
def delete_locale_language_session_var_if_it_exists():
|
||||
"""Clear the session locale preference to auto-detect from browser Accept-Language header"""
|
||||
if 'locale' in session:
|
||||
session.pop('locale', None)
|
||||
# Refresh Flask-Babel to clear cached locale
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
flash(gettext("Language set to auto-detect from browser"))
|
||||
|
||||
# Check if there's a redirect parameter to return to the same page
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, use it
|
||||
from changedetectionio.is_safe_url import is_safe_url
|
||||
if redirect_url and is_safe_url(redirect_url, current_app):
|
||||
return redirect(redirect_url)
|
||||
|
||||
# Otherwise redirect to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return ui_blueprint
|
||||
317
changedetectionio/blueprint/ui/diff.py
Normal file
317
changedetectionio/blueprint/ui/diff.py
Normal file
@@ -0,0 +1,317 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory
|
||||
from flask_babel import gettext
|
||||
|
||||
import re
|
||||
import importlib
|
||||
from loguru import logger
|
||||
from markupsafe import Markup
|
||||
|
||||
from changedetectionio.diff import (
|
||||
REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE,
|
||||
REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED,
|
||||
ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED,
|
||||
CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED,
|
||||
CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED
|
||||
)
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
diff_blueprint = Blueprint('ui_diff', __name__, template_folder="../ui/templates")
|
||||
|
||||
@diff_blueprint.app_template_filter('diff_unescape_difference_spans')
|
||||
def diff_unescape_difference_spans(content):
|
||||
"""Emulate Jinja2's auto-escape, then selectively unescape our diff spans."""
|
||||
from markupsafe import escape
|
||||
|
||||
if not content:
|
||||
return Markup('')
|
||||
|
||||
# Step 1: Escape everything like Jinja2 would (this makes it XSS-safe)
|
||||
escaped_content = escape(str(content))
|
||||
|
||||
# Step 2: Unescape only our exact diff spans generated by apply_html_color_to_body()
|
||||
# Pattern matches the exact structure:
|
||||
# <span style="{STYLE}" role="{ROLE}" aria-label="{LABEL}" title="{TITLE}">
|
||||
|
||||
# Unescape outer span opening tags with full attributes (role, aria-label, title)
|
||||
# Matches removed/added/changed/changed_into spans
|
||||
result = re.sub(
|
||||
rf'<span style="({re.escape(REMOVED_STYLE)}|{re.escape(ADDED_STYLE)})" '
|
||||
rf'role="(deletion|insertion|note)" '
|
||||
rf'aria-label="([^&]+?)" '
|
||||
rf'title="([^&]+?)">',
|
||||
r'<span style="\1" role="\2" aria-label="\3" title="\4">',
|
||||
str(escaped_content),
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# Unescape inner span opening tags (without additional attributes)
|
||||
# This matches the darker background styles for changed parts within lines
|
||||
result = re.sub(
|
||||
rf'<span style="({re.escape(REMOVED_INNER_STYLE)}|{re.escape(ADDED_INNER_STYLE)})">',
|
||||
r'<span style="\1">',
|
||||
result,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# Unescape closing tags (but only as many as we opened)
|
||||
open_count = result.count('<span style=')
|
||||
close_count = str(escaped_content).count('</span>')
|
||||
|
||||
# Replace up to the number of spans we opened
|
||||
for _ in range(min(open_count, close_count)):
|
||||
result = result.replace('</span>', '</span>', 1)
|
||||
|
||||
return Markup(result)
|
||||
|
||||
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
"""
|
||||
Render the history/diff page for a watch.
|
||||
|
||||
This route is processor-aware: it delegates rendering to the processor's
|
||||
difference.py module, allowing different processor types to provide
|
||||
custom visualizations:
|
||||
- text_json_diff: Text/HTML diff with syntax highlighting
|
||||
- restock_diff: Could show price charts and stock history
|
||||
- image_diff: Could show image comparison slider/overlay
|
||||
|
||||
Each processor implements processors/{type}/difference.py::render()
|
||||
If a processor doesn't have a difference module, falls back to text_json_diff.
|
||||
"""
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash(gettext("No history found for the specified link, bad link?"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
dates = list(watch.history.keys())
|
||||
if not dates or len(dates) < 2:
|
||||
flash(gettext("Not enough history (2 snapshots required) to show difference page for this watch."), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have difference module, use text_json_diff as default
|
||||
from changedetectionio.processors.text_json_diff.difference import render as default_render
|
||||
return default_render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_GET(uuid):
|
||||
"""
|
||||
Render the data extraction form for a watch.
|
||||
|
||||
This route is processor-aware: it delegates to the processor's
|
||||
extract.py module, allowing different processor types to provide
|
||||
custom extraction interfaces.
|
||||
|
||||
Each processor implements processors/{type}/extract.py::render_form()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash(gettext("No history found for the specified link, bad link?"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
|
||||
# Call the processor's render_form() function
|
||||
if hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import render_form as default_render_form
|
||||
return default_render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_POST(uuid):
|
||||
"""
|
||||
Process the data extraction request.
|
||||
|
||||
This route is processor-aware: it delegates to the processor's
|
||||
extract.py module, allowing different processor types to provide
|
||||
custom extraction logic.
|
||||
|
||||
Each processor implements processors/{type}/extract.py::process_extraction()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash(gettext("No history found for the specified link, bad link?"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
|
||||
# Call the processor's process_extraction() function
|
||||
if hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import process_extraction as default_process_extraction
|
||||
return default_process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
Serve processor-specific binary assets (images, files, etc.).
|
||||
|
||||
This route is processor-aware: it delegates to the processor's
|
||||
difference.py module, allowing different processor types to serve
|
||||
custom assets without embedding them as base64 in templates.
|
||||
|
||||
This solves memory issues with large binary data (e.g., screenshots)
|
||||
by streaming them as separate HTTP responses instead of embedding
|
||||
in the HTML template.
|
||||
|
||||
Each processor implements processors/{type}/difference.py::get_asset()
|
||||
which returns (binary_data, content_type, cache_control_header).
|
||||
|
||||
Example URLs:
|
||||
- /diff/{uuid}/processor-asset/before
|
||||
- /diff/{uuid}/processor-asset/after
|
||||
- /diff/{uuid}/processor-asset/rendered_diff
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash(gettext("No history found for the specified link, bad link?"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
|
||||
return diff_blueprint
|
||||
@@ -1,8 +1,8 @@
|
||||
import time
|
||||
from copy import deepcopy
|
||||
import os
|
||||
import importlib.resources
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, abort
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
@@ -32,14 +32,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if not datastore.data['watching'].keys():
|
||||
flash("No watches to edit", "error")
|
||||
flash(gettext("No watches to edit"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if not uuid in datastore.data['watching']:
|
||||
flash("No watch with the UUID %s found." % (uuid), "error")
|
||||
flash(gettext("No watch with the UUID {} found.").format(uuid), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
switch_processor = request.args.get('switch_processor')
|
||||
@@ -47,12 +47,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
for p in processors.available_processors():
|
||||
if p[0] == switch_processor:
|
||||
datastore.data['watching'][uuid]['processor'] = switch_processor
|
||||
flash(f"Switched to mode - {p[1]}.")
|
||||
flash(gettext("Switched to mode - {}.").format(p[1]))
|
||||
datastore.clear_watch_history(uuid)
|
||||
redirect(url_for('ui_edit.edit_page', uuid=uuid))
|
||||
|
||||
# be sure we update with a copy instead of accidently editing the live object by reference
|
||||
default = deepcopy(datastore.data['watching'][uuid])
|
||||
default = None
|
||||
while not default:
|
||||
try:
|
||||
default = deepcopy(datastore.data['watching'][uuid])
|
||||
except RuntimeError as e:
|
||||
# Dictionary changed
|
||||
continue
|
||||
|
||||
# Defaults for proxy choice
|
||||
if datastore.proxy_list is not None: # When enabled
|
||||
@@ -66,7 +72,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
||||
if not processor_classes:
|
||||
flash(f"Cannot load the edit form for processor/plugin '{processor_classes[1]}', plugin missing?", 'error')
|
||||
flash(gettext("Cannot load the edit form for processor/plugin '{}', plugin missing?").format(processor_classes[1]), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
parent_module = processors.get_parent_module(processor_classes[0])
|
||||
@@ -96,6 +102,26 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
form.datastore = datastore
|
||||
form.watch = default
|
||||
|
||||
# Load processor-specific config from JSON file for GET requests
|
||||
if request.method == 'GET' and processor_name:
|
||||
try:
|
||||
from changedetectionio.processors.base import difference_detection_processor
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = difference_detection_processor(datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||
|
||||
if processor_config:
|
||||
# Populate processor-config-* fields from JSON
|
||||
for config_key, config_value in processor_config.items():
|
||||
field_name = f'processor_config_{config_key}'
|
||||
if hasattr(form, field_name):
|
||||
getattr(form, field_name).data = config_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load processor config: {e}")
|
||||
|
||||
for p in datastore.extra_browsers:
|
||||
form.fetch_backend.choices.append(p)
|
||||
|
||||
@@ -114,11 +140,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
if request.method == 'POST' and form.validate():
|
||||
|
||||
# If they changed processor, it makes sense to reset it.
|
||||
if datastore.data['watching'][uuid].get('processor') != form.data.get('processor'):
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
flash("Reset watch history due to change of processor")
|
||||
|
||||
extra_update_obj = {
|
||||
'consecutive_filter_failures': 0,
|
||||
'last_error' : False
|
||||
@@ -129,7 +150,60 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||
|
||||
# Ignore text
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
processor_config_data = {}
|
||||
fields_to_remove = []
|
||||
for field_name, field_value in form.data.items():
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
if field_value: # Only save non-empty values
|
||||
processor_config_data[config_key] = field_value
|
||||
fields_to_remove.append(field_name)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = form.data.get('processor')
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = processors.difference_detection_processor(datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
# Try to import the edit_hook module from the processor package
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
|
||||
watch_obj = datastore.data['watching'][uuid]
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
|
||||
# Remove processor-config-* fields from form.data before updating datastore
|
||||
for field_name in fields_to_remove:
|
||||
form.data.pop(field_name, None)
|
||||
|
||||
# Ignore text
|
||||
form_ignore_text = form.ignore_text.data
|
||||
datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text
|
||||
|
||||
@@ -168,7 +242,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Recast it if need be to right data Watch handler
|
||||
watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, default=datastore.data['watching'][uuid])
|
||||
flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")
|
||||
flash(gettext("Updated watch - unpaused!") if request.args.get('unpause_on_save') else gettext("Updated watch."))
|
||||
|
||||
# Cleanup any browsersteps session for this watch
|
||||
try:
|
||||
from changedetectionio.blueprint.browser_steps import cleanup_session_for_watch
|
||||
cleanup_session_for_watch(uuid)
|
||||
except Exception as e:
|
||||
logger.debug(f"Error cleaning up browsersteps session: {e}")
|
||||
|
||||
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
|
||||
# But in the case something is added we should save straight away
|
||||
@@ -206,13 +287,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid))
|
||||
return redirect(url_for('ui.ui_diff.diff_history_page', uuid=uuid))
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=request.args.get("tag",'')))
|
||||
|
||||
else:
|
||||
if request.method == 'POST' and not form.validate():
|
||||
flash("An error occurred, please see below.", "error")
|
||||
flash(gettext("An error occurred, please see below."), "error")
|
||||
|
||||
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
|
||||
jq_support = True
|
||||
@@ -223,26 +304,25 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
|
||||
# if system or watch is configured to need a chrome type browser
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
watch_needs_selenium_or_playwright = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
watch_needs_selenium_or_playwright = True
|
||||
|
||||
|
||||
from zoneinfo import available_timezones
|
||||
|
||||
# Only works reliably with Playwright
|
||||
|
||||
# Import the global plugin system
|
||||
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
|
||||
|
||||
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras, get_fetcher_capabilities
|
||||
|
||||
# Get fetcher capabilities instead of hardcoded logic
|
||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
||||
|
||||
c = [f"processor-{watch.get('processor')}"]
|
||||
if worker_handler.is_watch_running(uuid):
|
||||
c.append('checking-now')
|
||||
|
||||
template_args = {
|
||||
'available_processors': processors.available_processors(),
|
||||
'available_timezones': sorted(available_timezones()),
|
||||
'browser_steps_config': browser_step_ui_config,
|
||||
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '',
|
||||
'extra_classes': ' '.join(c),
|
||||
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
|
||||
'extra_processor_config': form.extra_tab_content(),
|
||||
'extra_title': f" - Edit - {watch.label}",
|
||||
@@ -252,16 +332,19 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
|
||||
'jq_support': jq_support,
|
||||
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||
'app_rss_token': app_rss_token,
|
||||
'rss_uuid_feed' : {
|
||||
'label': watch.label,
|
||||
'url': url_for('rss.rss_single_watch', uuid=watch['uuid'], token=app_rss_token)
|
||||
},
|
||||
'settings_application': datastore.data['settings']['application'],
|
||||
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
|
||||
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
|
||||
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
|
||||
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
|
||||
'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch,
|
||||
'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright,
|
||||
'capabilities': capabilities
|
||||
}
|
||||
|
||||
included_content = None
|
||||
@@ -335,6 +418,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
s = re.sub(r'[0-9]+', r'\\d+', s)
|
||||
datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/')
|
||||
|
||||
return f"<a href={url_for('ui.ui_views.preview_page', uuid=uuid)}>Click to preview</a>"
|
||||
return f"<a href={url_for('ui.ui_preview.preview_page', uuid=uuid)}>Click to preview</a>"
|
||||
|
||||
return edit_blueprint
|
||||
@@ -2,7 +2,6 @@ from flask import Blueprint, request, make_response
|
||||
import random
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.notification_service import NotificationContextData, set_basic_notification_vars
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -15,7 +14,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@notification_blueprint.route("/notification/send-test/", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def ajax_callback_send_notification_test(watch_uuid=None):
|
||||
|
||||
from changedetectionio.notification_service import NotificationContextData, set_basic_notification_vars
|
||||
# Watch_uuid could be unset in the case it`s used in tag editor, global settings
|
||||
import apprise
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
@@ -97,45 +96,29 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
n_object['as_async'] = False
|
||||
|
||||
# Same like in notification service, should be refactored
|
||||
dates = []
|
||||
dates = list(watch.history.keys())
|
||||
trigger_text = ''
|
||||
snapshot_contents = ''
|
||||
if watch:
|
||||
watch_history = watch.history
|
||||
dates = list(watch_history.keys())
|
||||
trigger_text = watch.get('trigger_text', [])
|
||||
# Add text that was triggered
|
||||
if len(dates):
|
||||
snapshot_contents = watch.get_history_snapshot(dates[-1])
|
||||
else:
|
||||
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
|
||||
|
||||
if len(trigger_text):
|
||||
from . import html_tools
|
||||
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
|
||||
if triggered_text:
|
||||
triggered_text = '\n'.join(triggered_text)
|
||||
|
||||
# Could be called as a 'test notification' with only 1 snapshot available
|
||||
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
|
||||
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
|
||||
|
||||
|
||||
|
||||
if len(dates) > 1:
|
||||
prev_snapshot = watch.get_history_snapshot(dates[-2])
|
||||
current_snapshot = watch.get_history_snapshot(dates[-1])
|
||||
prev_snapshot = watch.get_history_snapshot(timestamp=dates[-2])
|
||||
current_snapshot = watch.get_history_snapshot(timestamp=dates[-1])
|
||||
|
||||
n_object.update(set_basic_notification_vars(snapshot_contents=snapshot_contents,
|
||||
current_snapshot=current_snapshot,
|
||||
n_object.update(set_basic_notification_vars(current_snapshot=current_snapshot,
|
||||
prev_snapshot=prev_snapshot,
|
||||
watch=watch,
|
||||
triggered_text=trigger_text))
|
||||
triggered_text=trigger_text,
|
||||
timestamp_changed=dates[-1] if dates else None))
|
||||
|
||||
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
e_str = str(e)
|
||||
# Remove this text which is not important and floods the container
|
||||
e_str = e_str.replace(
|
||||
|
||||
201
changedetectionio/blueprint/ui/preview.py
Normal file
201
changedetectionio/blueprint/ui/preview.py
Normal file
@@ -0,0 +1,201 @@
|
||||
from flask import Blueprint, request, url_for, flash, render_template, redirect
|
||||
from flask_babel import gettext
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import html_tools
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||
|
||||
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
"""
|
||||
Render the preview page for a watch.
|
||||
|
||||
This route is processor-aware: it delegates rendering to the processor's
|
||||
preview.py module, allowing different processor types to provide
|
||||
custom visualizations:
|
||||
- text_json_diff: Text preview with syntax highlighting
|
||||
- image_ssim_diff: Image preview with proper rendering
|
||||
- restock_diff: Could show latest price/stock data
|
||||
|
||||
Each processor implements processors/{type}/preview.py::render()
|
||||
If a processor doesn't have a preview module, falls back to default text preview.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash(gettext("No history found for the specified link, bad link?"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have preview module, use default text preview
|
||||
content = []
|
||||
versions = []
|
||||
timestamp = None
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
triggered_line_numbers = []
|
||||
ignored_line_numbers = []
|
||||
blocked_line_numbers = []
|
||||
|
||||
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
|
||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.args.get('version')
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
timestamp = preferred_version
|
||||
|
||||
try:
|
||||
versions = list(watch.history.keys())
|
||||
content = watch.get_history_snapshot(timestamp=timestamp)
|
||||
|
||||
triggered_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch.get('trigger_text'),
|
||||
mode='line numbers'
|
||||
)
|
||||
ignored_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch.get('ignore_text'),
|
||||
mode='line numbers'
|
||||
)
|
||||
blocked_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch.get("text_should_not_be_present"),
|
||||
mode='line numbers'
|
||||
)
|
||||
except Exception as e:
|
||||
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
|
||||
|
||||
from changedetectionio.pluggy_interface import get_fetcher_capabilities
|
||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||
|
||||
output = render_template("preview.html",
|
||||
capabilities=capabilities,
|
||||
content=content,
|
||||
current_diff_url=watch['url'],
|
||||
current_version=timestamp,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - Diff - {watch.label} @ {timestamp}",
|
||||
highlight_ignored_line_numbers=ignored_line_numbers,
|
||||
highlight_triggered_line_numbers=triggered_line_numbers,
|
||||
highlight_blocked_line_numbers=blocked_line_numbers,
|
||||
history_n=watch.history_n,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
screenshot=watch.get_screenshot(),
|
||||
uuid=uuid,
|
||||
versions=versions,
|
||||
watch=watch,
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@preview_blueprint.route("/preview/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
Serve processor-specific binary assets for preview (images, files, etc.).
|
||||
|
||||
This route is processor-aware: it delegates to the processor's
|
||||
preview.py module, allowing different processor types to serve
|
||||
custom assets without embedding them as base64 in templates.
|
||||
|
||||
This solves memory issues with large binary data by streaming them
|
||||
as separate HTTP responses instead of embedding in the HTML template.
|
||||
|
||||
Each processor implements processors/{type}/preview.py::get_asset()
|
||||
which returns (binary_data, content_type, cache_control_header).
|
||||
|
||||
Example URLs:
|
||||
- /preview/{uuid}/processor-asset/screenshot?version=123456789
|
||||
"""
|
||||
from flask import make_response
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash(gettext("No history found for the specified link, bad link?"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
|
||||
return preview_blueprint
|
||||
@@ -9,13 +9,12 @@
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
This will remove version history (snapshots) for ALL watches, but keep
|
||||
your list of URLs! <br />
|
||||
You may like to use the <strong>BACKUP</strong> link first.<br />
|
||||
{{ _('This will remove version history (snapshots) for ALL watches, but keep your list of URLs!') }} <br />
|
||||
{{ _('You may like to use the') }} <strong>{{ _('BACKUP') }}</strong> {{ _('link first.') }}<br />
|
||||
</div>
|
||||
<br />
|
||||
<div class="pure-control-group">
|
||||
<label for="confirmtext">Confirmation text</label>
|
||||
<label for="confirmtext">{{ _('Confirmation text') }}</label>
|
||||
<input
|
||||
type="text"
|
||||
id="confirmtext"
|
||||
@@ -25,20 +24,19 @@
|
||||
size="10"
|
||||
/>
|
||||
<span class="pure-form-message-inline"
|
||||
>Type in the word <strong>clear</strong> to confirm that you
|
||||
understand.</span
|
||||
>{{ _('Type in the word') }} <strong>{{ _('clear') }}</strong> {{ _('to confirm that you understand.') }}</span
|
||||
>
|
||||
</div>
|
||||
<br />
|
||||
<div class="pure-control-group">
|
||||
<button type="submit" class="pure-button pure-button-primary">
|
||||
Clear History!
|
||||
{{ _('Clear History!') }}
|
||||
</button>
|
||||
</div>
|
||||
<br />
|
||||
<div class="pure-control-group">
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel"
|
||||
>Cancel</a
|
||||
>{{ _('Cancel') }}</a
|
||||
>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
<ul id="highlightSnippetActions">
|
||||
<li>
|
||||
<button class="pure-button pure-button-primary" onclick="diffToJpeg()" title="{{ _('Share diff as image') }}">{{ _('Share as Image') }}</button>
|
||||
</li>
|
||||
<li>
|
||||
<a class="pure-button pure-button-primary" data-mode="exact" href="javascript:void(0);">{{ _('Ignore any lines matching') }}</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="pure-button pure-button-primary" data-mode="digit-regex" href="javascript:void(0);" >{{ _('Ignore any lines matching excluding digits') }}</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
166
changedetectionio/blueprint/ui/templates/diff.html
Normal file
166
changedetectionio/blueprint/ui/templates/diff.html
Normal file
@@ -0,0 +1,166 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
{% block content %}
|
||||
<script>
|
||||
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
||||
{% if last_error_screenshot %}
|
||||
const error_screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
|
||||
{% endif %}
|
||||
|
||||
const highlight_submit_ignore_url="{{url_for('ui.ui_edit.highlight_submit_ignore_url', uuid=uuid)}}";
|
||||
const watch_url= {{watch_a.link|tojson}};
|
||||
|
||||
// Initial scroll position: if set, scroll to this line number in #difference on page load
|
||||
const initialScrollToLineNumber = {{ initial_scroll_line_number|default('null') }};
|
||||
</script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/html2canvas@1.4.1/dist/html2canvas.min.js"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/piexifjs@1.0.6/piexif.min.js"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='snippet-to-image.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
|
||||
|
||||
|
||||
<div id="settings">
|
||||
<form class="pure-form " action="{{ url_for("ui.ui_diff.diff_history_page", uuid=uuid) }}" method="GET" id="diff-form">
|
||||
<fieldset class="diff-fieldset">
|
||||
{% if versions|length >= 1 %}
|
||||
<span style="white-space: nowrap;">
|
||||
<label id="change-from" for="diff-from-version" class="from-to-label">{{ _('From') }}</label>
|
||||
<select id="diff-from-version" name="from_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== from_version %} selected="" {% endif %}>
|
||||
{{ version }}{#{% if loop.index == 2 %} (Previous){% endif %}#}
|
||||
</option>
|
||||
{%- endfor -%}
|
||||
</select>
|
||||
</span>
|
||||
<span style="white-space: nowrap;">
|
||||
<label id="change-to" for="diff-to-version" class="from-to-label">{{ _('To') }}</label>
|
||||
<select id="diff-to-version" name="to_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== to_version %} selected="" {% endif %}>
|
||||
{{ version }}{#{% if loop.first %} (Current){% endif %}#}
|
||||
</option>
|
||||
{%- endfor -%}
|
||||
</select>
|
||||
</span>
|
||||
{#<button type="submit" class="pure-button pure-button-primary reset-margin">Go</button>#}
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
<fieldset id="diff-style">
|
||||
<span>
|
||||
<label for="diffWords" class="pure-checkbox">
|
||||
<input type="radio" name="type" id="diffWords" value="diffWords" {% if diff_prefs.type == 'diffWords' %}checked=""{% endif %}> {{ _('Words') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="diffLines" class="pure-checkbox">
|
||||
<input type="radio" name="type" id="diffLines" value="diffLines" {% if diff_prefs.type == 'diffLines' %}checked=""{% endif %}> {{ _('Lines') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="ignoreWhitespace" class="pure-checkbox" id="label-diff-ignorewhitespace">
|
||||
<input type="checkbox" id="ignoreWhitespace" name="ignoreWhitespace" {% if diff_prefs.ignoreWhitespace %}checked=""{% endif %}> {{ _('Ignore Whitespace') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="changesOnly" class="pure-checkbox" id="label-diff-changes">
|
||||
<input type="checkbox" id="changesOnly" name="changesOnly" {% if diff_prefs.changesOnly %}checked=""{% endif %}> {{ _('Same/non-changed') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="removed" class="pure-checkbox" id="label-diff-removed">
|
||||
<input type="checkbox" id="removed" name="removed" {% if diff_prefs.removed %}checked=""{% endif %}> {{ _('Removed') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="added" class="pure-checkbox" id="label-diff-added">
|
||||
<input type="checkbox" id="added" name="added" {% if diff_prefs.added %}checked=""{% endif %}> {{ _('Added') }}</label>
|
||||
</span>
|
||||
<span>
|
||||
<label for="replaced" class="pure-checkbox" id="label-diff-replaced">
|
||||
<input type="checkbox" id="replaced" name="replaced" {% if diff_prefs.replaced %}checked=""{% endif %}> {{ _('Replaced') }}</label>
|
||||
</span>
|
||||
</fieldset>
|
||||
{%- if versions|length >= 2 -%}
|
||||
<div id="keyboard-nav">
|
||||
<strong>{{ _('Keyboard:') }} </strong>
|
||||
<a href="" class="pure-button pure-button-primary" id="btn-previous"> ← {{ _('Previous') }}</a>
|
||||
<a class="pure-button pure-button-primary" id="btn-next" href=""> → {{ _('Next') }}</a>
|
||||
</div>
|
||||
{%- endif -%}
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div id="diff-jump" style="display:none;"><!-- disabled for now -->
|
||||
<a id="jump-next-diff" title="{{ _('Jump to next difference') }}">{{ _('Jump') }}</a>
|
||||
</div>
|
||||
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<div class="tabs">
|
||||
<ul>
|
||||
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="#error-text">{{ _('Error Text') }}</a></li> {% endif %}
|
||||
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">{{ _('Error Screenshot') }}</a></li> {% endif %}
|
||||
<li class="tab" id="text-tab"><a href="#text">{{ _('Text') }}</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="#screenshot">{{ _('Current screenshot') }}</a></li>
|
||||
<li class="tab" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">{{ _('Extract Data') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div id="diff-ui">
|
||||
<div class="tab-pane-inner" id="error-text">
|
||||
<div class="snapshot-age error">{{watch_a.error_text_ctime|format_seconds_ago}} {{ _('seconds ago.') }}</div>
|
||||
<pre>
|
||||
{{ last_error_text }}
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="error-screenshot">
|
||||
<div class="snapshot-age error">{{watch_a.snapshot_error_screenshot_ctime|format_seconds_ago}} {{ _('seconds ago') }}</div>
|
||||
<img id="error-screenshot-img" style="max-width: 80%" alt="{{ _('Current error-ing screenshot from most recent request') }}" >
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="text">
|
||||
{%- if (content | default('')).split('\n') | length > 100 -%}
|
||||
<div id="cell-diff-jump-visualiser" style="user-select: none;">
|
||||
{%- for cell in diff_cell_grid -%}
|
||||
<div{% if cell.class %} class="{{ cell.class }}"{% endif %}></div>
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
{%- endif -%}
|
||||
{%- if password_enabled_and_share_is_off -%}
|
||||
<div class="tip">{{ _('Pro-tip: You can enable') }} <strong>{{ _('"share access when password is enabled"') }}</strong> {{ _('from settings.') }}
|
||||
</div>
|
||||
{%- endif -%}
|
||||
<div id="text-diff-heading-area" style="user-select: none;">
|
||||
<div class="snapshot-age"><span>{{ from_version|format_timestamp_timeago }}</span>
|
||||
{%- if note -%}<span class="note"><strong>{{ note }}</strong></span>{%- endif -%}
|
||||
<a href="{{ url_for("ui.ui_preview.preview_page", uuid=uuid) }}">{{ _('Goto single snapshot') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
<pre id="difference" style="border-left: 2px solid #ddd;">{{ content| diff_unescape_difference_spans }}</pre>
|
||||
<div id="diff-visualiser-area-after" style="user-select: none;">
|
||||
<strong>{{ _('Tip:') }}</strong> {{ _('Highlight text to share or add to ignore lists.') }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="screenshot">
|
||||
<div class="tip">
|
||||
{{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}
|
||||
</div>
|
||||
{% if is_html_webdriver %}
|
||||
{% if screenshot %}
|
||||
<div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
|
||||
<img style="max-width: 80%" id="screenshot-img" alt="{{ _('Current screenshot from most recent request') }}" >
|
||||
{% else %}
|
||||
{{ _('No screenshot available just yet! Try rechecking the page.') }}
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<strong>{{ _('Screenshot requires Playwright/WebDriver enabled') }}</strong>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<script>
|
||||
const newest_version_timestamp = {{newest_version_timestamp}};
|
||||
</script>
|
||||
<script src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
|
||||
|
||||
|
||||
{% endblock %}
|
||||
@@ -1,12 +1,13 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, highlight_trigger_ignored_explainer, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='scheduler.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='conditions.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
|
||||
|
||||
<script>
|
||||
@@ -43,20 +44,20 @@
|
||||
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="#general">General</a></li>
|
||||
<li class="tab"><a href="#request">Request</a></li>
|
||||
<li class="tab"><a href="#general">{{ _('General') }}</a></li>
|
||||
<li class="tab"><a href="#request">{{ _('Request') }}</a></li>
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">{{ _('Browser Steps') }}</a></li>
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">Conditions</a></li>
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
||||
{% endif %}
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
<li class="tab"><a href="#stats">Stats</a></li>
|
||||
<li class="tab"><a href="#notifications">{{ _('Notifications') }}</a></li>
|
||||
<li class="tab"><a href="#stats">{{ _('Stats') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -69,19 +70,19 @@
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
|
||||
<div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
|
||||
<div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
<div class="pure-form-message">{{ _('Some sites use JavaScript to create the content, for this you should') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">{{ _('use the Chrome/WebDriver Fetcher') }}</a></div>
|
||||
<div class="pure-form-message">{{ _('Variables are supported in the URL') }} (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">{{ _('help and examples here') }}</a>).</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.tags) }}
|
||||
<span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
|
||||
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, class="m-d", placeholder=watch.label) }}
|
||||
<span class="pure-form-message-inline">Automatically uses the page title if found, you can also use your own title/description here</span>
|
||||
<span class="pure-form-message-inline">{{ _('Automatically uses the page title if found, you can also use your own title/description here') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group time-between-check border-fieldset">
|
||||
|
||||
@@ -91,7 +92,7 @@
|
||||
{{ render_field(form.time_between_check, class="time-check-widget") }}
|
||||
|
||||
<span class="pure-form-message-inline">
|
||||
The interval/amount of time between each check.
|
||||
{{ _('The interval/amount of time between each check.') }}
|
||||
</span>
|
||||
</div>
|
||||
<div id="time-between-check-schedule">
|
||||
@@ -106,7 +107,7 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.filter_failure_notification_send) }}
|
||||
<span class="pure-form-message-inline">
|
||||
Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
|
||||
{{ _('Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
@@ -119,17 +120,17 @@
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
|
||||
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched site doesn\'t need Javascript to render.') }}</p>
|
||||
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.') }} </p>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
</span>
|
||||
</div>
|
||||
{% if form.proxy %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
<div>{{ form.proxy.label }} <a href="" id="check-all-proxies" class="pure-button button-secondary button-xsmall" >Check/Scan all</a></div>
|
||||
<div>{{ form.proxy.label }} <a href="" id="check-all-proxies" class="pure-button button-secondary button-xsmall" >{{ _('Check/Scan all') }}</a></div>
|
||||
<div>{{ form.proxy(class="fetch-backend-proxy") }}</div>
|
||||
<span class="pure-form-message-inline">
|
||||
Choose a proxy for this watch
|
||||
{{ _('Choose a proxy for this watch') }}
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
@@ -139,31 +140,29 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.webdriver_delay) }}
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
|
||||
<strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
|
||||
<br>
|
||||
This will wait <i>n</i> seconds before extracting the text.
|
||||
{{ _('This will wait') }} <i>n</i> {{ _('seconds before extracting the text.') }}
|
||||
{% if using_global_webdriver_wait %}
|
||||
<br><strong>Using the current global default settings</strong>
|
||||
<br><strong>{{ _('Using the current global default settings') }}</strong>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">Show advanced options</a>
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">{{ _('Show advanced options') }}</a>
|
||||
</div>
|
||||
<div class="advanced-options" style="display: none;">
|
||||
{{ render_field(form.webdriver_js_execute_code) }}
|
||||
<div class="pure-form-message-inline">
|
||||
Run this code before performing change detection, handy for filling in fields and other
|
||||
actions <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/Run-JavaScript-before-change-detection">More
|
||||
help and examples here</a>
|
||||
{{ _('Run this code before performing change detection, handy for filling in fields and other actions') }} <a
|
||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/Run-JavaScript-before-change-detection">{{ _('More help and examples here') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
<!-- html requests always -->
|
||||
<fieldset data-visible-for="fetch_backend=html_requests">
|
||||
<div class="pure-control-group">
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">Show advanced options</a>
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">{{ _('Show advanced options') }}</a>
|
||||
</div>
|
||||
<div class="advanced-options" style="display: none;">
|
||||
<div class="pure-control-group" id="request-method">
|
||||
@@ -178,7 +177,7 @@
|
||||
\"year\":{% now 'Europe/Berlin', '%Y' %}
|
||||
}") }}
|
||||
</div>
|
||||
<div class="pure-form-message">Variables are supported in the request body (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
<div class="pure-form-message">{{ _('Variables are supported in the request body') }} (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">{{ _('help and examples here') }}</a>).</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
<!-- hmm -->
|
||||
@@ -187,15 +186,15 @@
|
||||
Cookie: foobar
|
||||
User-Agent: wonderbra 1.0
|
||||
Math: {{ 1 + 1 }}") }}
|
||||
<div class="pure-form-message">Variables are supported in the request header values (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
<div class="pure-form-message">{{ _('Variables are supported in the request header values') }} (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">{{ _('help and examples here') }}</a>).</div>
|
||||
<div class="pure-form-message-inline">
|
||||
{% if has_extra_headers_file %}
|
||||
<strong>Alert! Extra headers file found and will be added to this watch!</strong>
|
||||
<strong>{{ _('Alert! Extra headers file found and will be added to this watch!') }}</strong>
|
||||
{% else %}
|
||||
Headers can be also read from a file in your data-directory <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file">Read more here</a>
|
||||
{{ _('Headers can be also read from a file in your data-directory') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Adding-headers-from-an-external-file">{{ _('Read more here') }}</a>
|
||||
{% endif %}
|
||||
<br>
|
||||
(Not supported by Selenium browser)
|
||||
({{ _('Not supported by Selenium browser') }})
|
||||
</div>
|
||||
</div>
|
||||
<fieldset data-visible-for="fetch_backend=html_requests fetch_backend=html_webdriver" >
|
||||
@@ -206,9 +205,8 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="browser-steps">
|
||||
{% if watch_needs_selenium_or_playwright %}
|
||||
{# Only works with playwright #}
|
||||
{% if system_has_playwright_configured %}
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
{% if true %}
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
@@ -220,19 +218,19 @@ Math: {{ 1 + 1 }}") }}
|
||||
|
||||
<!--- Do this later -->
|
||||
<div class="checkbox" style="display: none;">
|
||||
<input type=checkbox id="include_text_elements" > <label for="include_text_elements">Turn on text finder</label>
|
||||
<input type=checkbox id="include_text_elements" > <label for="include_text_elements">{{ _('Turn on text finder') }}</label>
|
||||
</div>
|
||||
|
||||
<div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div>
|
||||
<div id="loading-status-text" style="display: none;">{{ _('Please wait, first browser step can take a little time to load..') }}<div class="spinner"></div></div>
|
||||
<div class="flex-wrapper" >
|
||||
|
||||
<div id="browser-steps-ui" class="noselect">
|
||||
<div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%">
|
||||
<span class="loader" >
|
||||
<span id="browsersteps-click-start">
|
||||
<h2 >Click here to Start</h2>
|
||||
<h2 >{{ _('Click here to Start') }}</h2>
|
||||
<svg style="height: 3.5rem;" version="1.1" viewBox="0 0 32 32" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><g id="start"/><g id="play_x5F_alt"><path d="M16,0C7.164,0,0,7.164,0,16s7.164,16,16,16s16-7.164,16-16S24.836,0,16,0z M10,24V8l16.008,8L10,24z" style="fill: var(--color-grey-400);"/></g></svg><br>
|
||||
Please allow 10-15 seconds for the browser to connect.<br>
|
||||
{{ _('Please allow 10-15 seconds for the browser to connect.') }}<br>
|
||||
</span>
|
||||
<div class="spinner" style="display: none;"></div>
|
||||
</span>
|
||||
@@ -241,22 +239,20 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</div>
|
||||
<div id="browser-steps-fieldlist" >
|
||||
<span id="browser-seconds-remaining">Press "Play" to start.</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
<span id="browser-seconds-remaining">{{ _('Press "Play" to start.') }}</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
{{ render_field(form.browser_steps) }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
{% else %}
|
||||
{# it's configured to use selenium or chrome but system says its not configured #}
|
||||
{{ playwright_warning() }}
|
||||
{% if system_has_webdriver_configured %}
|
||||
<strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
|
||||
{% endif %}
|
||||
<strong>{{ _('Visual Selector data is not ready, watch needs to be checked atleast once.') }}</strong>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{# "This functionality needs chrome.." #}
|
||||
{{ only_playwright_type_watches_warning() }}
|
||||
<p>
|
||||
<strong>{{ _('Sorry, this functionality only works with fetchers that support interactive Javascript (so far only Playwright based fetchers)') }}<br>
|
||||
{{ _('You need to') }} <a href="#request">{{ _('Set the fetch method') }}</a> {{ _('to one that supports interactive Javascript.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
@@ -266,28 +262,28 @@ Math: {{ 1 + 1 }}") }}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_ternary_field(form.notification_muted, BooleanField=true) }}
|
||||
</div>
|
||||
{% if watch_needs_selenium_or_playwright %}
|
||||
{% if capabilities.supports_screenshots %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_screenshot) }}
|
||||
<span class="pure-form-message-inline">
|
||||
<strong>Use with caution!</strong> This will easily fill up your email storage quota or flood other storages.
|
||||
<strong>{{ _('Use with caution!') }}</strong> {{ _('This will easily fill up your email storage quota or flood other storages.') }}
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="field-group" id="notification-field-group">
|
||||
{% if has_default_notification_urls %}
|
||||
<div class="inline-warning">
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="Look out!" title="Lookout!" >
|
||||
There are <a href="{{ url_for('settings.settings_page')}}#notifications">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications.
|
||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="{{ _('Look out!') }}" title="{{ _('Lookout!') }}" >
|
||||
{{ _('There are') }} <a href="{{ url_for('settings.settings_page')}}#notifications">{{ _('system-wide notification URLs enabled') }}</a>, {{ _('this form will override notification settings for this watch only') }} ‐ {{ _('an empty Notification URL list here will still send notifications.') }}
|
||||
</div>
|
||||
{% endif %}
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">Use system defaults</a>
|
||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">{{ _('Use system defaults') }}</a>
|
||||
{{ render_common_settings_form(form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
|
||||
<div class="tab-pane-inner" id="conditions">
|
||||
<script>
|
||||
@@ -298,23 +294,23 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ render_conditions_fieldlist_of_formfields_as_table(form.conditions) }}
|
||||
<div class="pure-form-message-inline">
|
||||
|
||||
<p id="verify-state-text">Use the verify (✓) button to test if a condition passes against the current snapshot.</p>
|
||||
Read a quick tutorial about <a href="https://changedetection.io/tutorial/conditional-actions-web-page-changes">using conditional web page changes here</a>.<br>
|
||||
<p id="verify-state-text">{{ _('Use the verify (✓) button to test if a condition passes against the current snapshot.') }}</p>
|
||||
{{ _('Read a quick tutorial about') }} <a href="https://changedetection.io/tutorial/conditional-actions-web-page-changes">{{ _('using conditional web page changes here') }}</a>.<br>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">Activate preview</span>
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
||||
<div>
|
||||
<div id="edit-text-filter">
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
<strong>Pro-tips:</strong><br>
|
||||
<strong>{{ _('Pro-tips:') }}</strong><br>
|
||||
<ul>
|
||||
<li>
|
||||
Use the preview page to see your filters and triggers highlighted.
|
||||
{{ _('Use the preview page to see your filters and triggers highlighted.') }}
|
||||
</li>
|
||||
<li>
|
||||
Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a>
|
||||
{{ _('Some sites use JavaScript to create the content, for this you should') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">{{ _('use the Chrome/WebDriver Fetcher') }}</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
@@ -322,50 +318,51 @@ Math: {{ 1 + 1 }}") }}
|
||||
{% include "edit/include_subtract.html" %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||
<h3>Text filtering</h3>
|
||||
Limit trigger/ignore/block/extract to;<br>
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
{{ _('Limit trigger/ignore/block/extract to;') }}<br>
|
||||
{{ render_checkbox_field(form.filter_text_added) }}
|
||||
{{ render_checkbox_field(form.filter_text_replaced) }}
|
||||
{{ render_checkbox_field(form.filter_text_removed) }}
|
||||
<span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span><br>
|
||||
<span class="pure-form-message-inline"> So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
|
||||
<span class="pure-form-message-inline"> When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
|
||||
<span class="pure-form-message-inline">{{ _('Note: Depending on the length and similarity of the text on each line, the algorithm may consider an') }} <strong>{{ _('addition') }}</strong> {{ _('instead of') }} <strong>{{ _('replacement') }}</strong> {{ _('for example.') }}</span><br>
|
||||
<span class="pure-form-message-inline"> {{ _('So it\'s always better to select') }} <strong>{{ _('Added') }}</strong>+<strong>{{ _('Replaced') }}</strong> {{ _('when you\'re interested in new content.') }}</span><br>
|
||||
<span class="pure-form-message-inline"> {{ _('When content is merely moved in a list, it will also trigger an') }} <strong>{{ _('addition') }}</strong>, {{ _('consider enabling') }} <code><strong>{{ _('Only trigger when unique lines appear') }}</strong></code></span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.check_unique_lines) }}
|
||||
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.') }}</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.remove_duplicate_lines) }}
|
||||
<span class="pure-form-message-inline">Remove duplicate lines of text</span>
|
||||
<span class="pure-form-message-inline">{{ _('Remove duplicate lines of text') }}</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.sort_text_alphabetically) }}
|
||||
<span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Helps reduce changes detected caused by sites shuffling lines around, combine with') }} <i>{{ _('check unique lines') }}</i> {{ _('below.') }}</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.trim_text_whitespace) }}
|
||||
<span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
|
||||
<span class="pure-form-message-inline">{{ _('Remove any whitespace before and after each line of text') }}</span>
|
||||
</fieldset>
|
||||
{% include "edit/text-options.html" %}
|
||||
</div>
|
||||
</div>
|
||||
<div id="text-preview" style="display: none;" >
|
||||
<script>
|
||||
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
|
||||
</script>
|
||||
<br>
|
||||
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
|
||||
<div class="minitabs-wrapper">
|
||||
<div class="minitabs-content">
|
||||
<div id="text-preview-inner" class="monospace-preview">
|
||||
<p>Loading...</p>
|
||||
</div>
|
||||
<div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
|
||||
<p>Loading...</p>
|
||||
</div>
|
||||
<script>
|
||||
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
|
||||
</script>
|
||||
<br>
|
||||
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
|
||||
<div class="minitabs-wrapper">
|
||||
<div class="minitabs-content">
|
||||
<div id="text-preview-inner" class="monospace-preview">
|
||||
<p>{{ _('Loading...') }}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
|
||||
<p>{{ _('Loading...') }}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{ highlight_trigger_ignored_explainer() }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -377,41 +374,55 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ extra_form_content|safe }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{% if watch_needs_selenium_or_playwright %}
|
||||
{% if system_has_playwright_configured %}
|
||||
<span class="pure-form-message-inline" id="visual-selector-heading">
|
||||
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
|
||||
</span>
|
||||
{% if capabilities.supports_screenshots and capabilities.supports_xpath_element_data %}
|
||||
{% if visual_selector_data_ready %}
|
||||
<span class="pure-form-message-inline" id="visual-selector-heading">
|
||||
{{ _('The Visual Selector tool lets you select the') }} <i>{{ _('text') }}</i> {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} <a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a> {{ _('tab. Use') }} <strong>{{ _('Shift+Click') }}</strong> {{ _('to select multiple items.') }}
|
||||
</span>
|
||||
|
||||
<div id="selector-header">
|
||||
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
|
||||
<!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
|
||||
<i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
|
||||
</div>
|
||||
<div id="selector-wrapper" style="display: none">
|
||||
<!-- request the screenshot and get the element offset info ready -->
|
||||
<!-- use img src ready load to know everything is ready to map out -->
|
||||
<!-- @todo: maybe something interesting like a field to select 'elements that contain text... and their parents n' -->
|
||||
<img id="selector-background" >
|
||||
<canvas id="selector-canvas"></canvas>
|
||||
</div>
|
||||
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong> <span class="text">Loading...</span></div>
|
||||
{% else %}
|
||||
{# The watch needed chrome but system says that playwright is not ready #}
|
||||
{{ playwright_warning() }}
|
||||
{% endif %}
|
||||
{% if system_has_webdriver_configured %}
|
||||
<strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'image_ssim_diff' %}
|
||||
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
|
||||
<label style="font-weight: 600; margin-right: 15px;">{{ _('Selection Mode:') }}</label>
|
||||
<label style="margin-right: 15px;">
|
||||
<input type="radio" name="selector-mode" value="element" style="margin-right: 5px;">
|
||||
{{ _('Select by element') }}
|
||||
</label>
|
||||
<label>
|
||||
<input type="radio" name="selector-mode" value="draw" checked style="margin-right: 5px;">
|
||||
{{ _('Draw area') }}
|
||||
</label>
|
||||
{{ render_field(form.processor_config_bounding_box) }}
|
||||
{{ render_field(form.processor_config_selection_mode) }}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div id="selector-header">
|
||||
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">{{ _('Clear selection') }}</a>
|
||||
<!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
|
||||
<i class="fetching-update-notice" style="font-size: 80%;">{{ _('One moment, fetching screenshot and element information..') }}</i>
|
||||
</div>
|
||||
<div id="selector-wrapper" style="display: none">
|
||||
<!-- request the screenshot and get the element offset info ready -->
|
||||
<!-- use img src ready load to know everything is ready to map out -->
|
||||
<!-- @todo: maybe something interesting like a field to select 'elements that contain text... and their parents n' -->
|
||||
<img id="selector-background" >
|
||||
<canvas id="selector-canvas"></canvas>
|
||||
</div>
|
||||
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>{{ _('Currently:') }}</strong> <span class="text">{{ _('Loading...') }}</span></div>
|
||||
{% else %}
|
||||
<strong>{{ _('Visual Selector data is not ready, watch needs to be checked atleast once.') }}</strong>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{# "This functionality needs chrome.." #}
|
||||
{{ only_playwright_type_watches_warning() }}
|
||||
<p>
|
||||
<strong>{{ _('Sorry, this functionality only works with fetchers that support Javascript and screenshots (such as playwright etc).') }}<br>
|
||||
{{ _('You need to') }} <a href="#request">{{ _('Set the fetch method') }}</a> {{ _('to one that supports Javascript and screenshots.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</fieldset>
|
||||
@@ -427,27 +438,27 @@ Math: {{ 1 + 1 }}") }}
|
||||
<table class="pure-table" id="stats-table">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>Check count</td>
|
||||
<td>{{ _('Check count') }}</td>
|
||||
<td>{{ "{:,}".format( watch.check_count) }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Consecutive filter failures</td>
|
||||
<td>{{ _('Consecutive filter failures') }}</td>
|
||||
<td>{{ "{:,}".format( watch.consecutive_filter_failures) }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>History length</td>
|
||||
<td>{{ _('History length') }}</td>
|
||||
<td>{{ "{:,}".format(watch.history|length) }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Last fetch duration</td>
|
||||
<td>{{ _('Last fetch duration') }}</td>
|
||||
<td>{{ watch.fetch_time }}s</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Notification alert count</td>
|
||||
<td>{{ _('Notification alert count') }}</td>
|
||||
<td>{{ watch.notification_alert_count }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Server type reply</td>
|
||||
<td>{{ _('Server type reply') }}</td>
|
||||
<td>{{ watch.get('remote_server_reply') }}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
@@ -461,7 +472,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
|
||||
{% if watch.history_n %}
|
||||
<p>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">{{ _('Download latest HTML snapshot') }}</a>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
@@ -471,11 +482,22 @@ Math: {{ 1 + 1 }}") }}
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(form.save_button) }}
|
||||
<a href="{{url_for('ui.form_delete', uuid=uuid)}}"
|
||||
class="pure-button button-error ">Delete</a>
|
||||
class="pure-button button-error"
|
||||
data-requires-confirm
|
||||
data-confirm-type="danger"
|
||||
data-confirm-title="{{ _('Delete Watch?') }}"
|
||||
data-confirm-message="<p>{{ _('Are you sure you want to delete the watch for:') }}</p><p><strong>{{ watch.get('url', 'this watch') }}</strong></p><p>{{ _('This action cannot be undone.') }}</p>"
|
||||
data-confirm-button="{{ _('Delete') }}">{{ _('Delete') }}</a>
|
||||
{% if watch.history_n %}<a href="{{url_for('ui.clear_watch_history', uuid=uuid)}}"
|
||||
class="pure-button button-error">Clear History</a>{% endif %}
|
||||
class="pure-button button-error"
|
||||
data-requires-confirm
|
||||
data-confirm-type="warning"
|
||||
data-confirm-title="{{ _('Clear History?') }}"
|
||||
data-confirm-message="<p>{{ _('Are you sure you want to clear all history for:') }}</p><p><strong>{{ watch.get('url', 'this watch') }}</strong></p><p>{{ _('This will remove all snapshots and previous versions. This action cannot be undone.') }}</p>"
|
||||
data-confirm-button="{{ _('Clear History') }}">{{ _('Clear History') }}</a>{% endif %}
|
||||
<a href="{{url_for('ui.form_clone', uuid=uuid)}}"
|
||||
class="pure-button">Clone & Edit</a>
|
||||
class="pure-button">{{ _('Clone & Edit') }}</a>
|
||||
<a href="{{ url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token)}}"><img alt="{{ _('RSS Feed for this watch') }}" style="padding: .5em 1em;" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% from '_helpers.html' import highlight_trigger_ignored_explainer %}
|
||||
{% block content %}
|
||||
<script>
|
||||
const screenshot_url = "{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
||||
const triggered_line_numbers = {{ triggered_line_numbers|tojson }};
|
||||
const triggered_line_numbers = {{ highlight_triggered_line_numbers|tojson }};
|
||||
const ignored_line_numbers = {{ highlight_ignored_line_numbers|tojson }};
|
||||
const blocked_line_numbers = {{ highlight_blocked_line_numbers|tojson }};
|
||||
{% if last_error_screenshot %}
|
||||
const error_screenshot_url = "{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
|
||||
{% endif %}
|
||||
@@ -14,10 +16,10 @@
|
||||
<script src="{{ url_for('static_content', group='js', filename='preview.js') }}" defer></script>
|
||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="settings" style="text-align: center;">
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="" method="POST">
|
||||
<fieldset>
|
||||
<label for="preview-version">Select timestamp</label> <select id="preview-version"
|
||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||
name="from_version"
|
||||
class="needs-localtime">
|
||||
{% for version in versions|reverse %}
|
||||
@@ -26,27 +28,27 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<button type="submit" class="pure-button pure-button-primary">Go</button>
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||
|
||||
</fieldset>
|
||||
</form>
|
||||
<br>
|
||||
<strong>Keyboard: </strong><a href="" class="pure-button pure-button-primary" id="btn-previous">
|
||||
← Previous</a> <a class="pure-button pure-button-primary" id="btn-next" href="">
|
||||
→ Next</a>
|
||||
<strong>{{ _('Keyboard:') }} </strong><a href="" class="pure-button pure-button-primary" id="btn-previous">
|
||||
← {{ _('Previous') }}</a> <a class="pure-button pure-button-primary" id="btn-next" href="">
|
||||
→ {{ _('Next') }}</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="tabs">
|
||||
<ul>
|
||||
{% if last_error_text %}
|
||||
<li class="tab" id="error-text-tab"><a href="#error-text">Error Text</a></li> {% endif %}
|
||||
<li class="tab" id="error-text-tab"><a href="#error-text">{{ _('Error Text') }}</a></li> {% endif %}
|
||||
{% if last_error_screenshot %}
|
||||
<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">Error Screenshot</a>
|
||||
<li class="tab" id="error-screenshot-tab"><a href="#error-screenshot">{{ _('Error Screenshot') }}</a>
|
||||
</li> {% endif %}
|
||||
{% if history_n > 0 %}
|
||||
<li class="tab" id="text-tab"><a href="#text">Text</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li>
|
||||
<li class="tab" id="text-tab"><a href="#text">{{ _('Text') }}</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="#screenshot">{{ _('Current screenshot') }}</a></li>
|
||||
{% endif %}
|
||||
</ul>
|
||||
</div>
|
||||
@@ -54,50 +56,39 @@
|
||||
|
||||
<div id="diff-ui">
|
||||
<div class="tab-pane-inner" id="error-text">
|
||||
<div class="snapshot-age error">{{ watch.error_text_ctime|format_seconds_ago }} seconds ago</div>
|
||||
<div class="snapshot-age error">{{ watch.error_text_ctime|format_seconds_ago }} {{ _('seconds ago') }}</div>
|
||||
<pre>
|
||||
{{ last_error_text }}
|
||||
</pre>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="error-screenshot">
|
||||
<div class="snapshot-age error">{{ watch.snapshot_error_screenshot_ctime|format_seconds_ago }} seconds ago
|
||||
<div class="snapshot-age error">{{ watch.snapshot_error_screenshot_ctime|format_seconds_ago }} {{ _('seconds ago') }}
|
||||
</div>
|
||||
<img id="error-screenshot-img" style="max-width: 80%"
|
||||
alt="Current erroring screenshot from most recent request">
|
||||
alt="{{ _('Current erroring screenshot from most recent request') }}">
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="text">
|
||||
{{ highlight_trigger_ignored_explainer() }}
|
||||
<div class="snapshot-age">{{ current_version|format_timestamp_timeago }}</div>
|
||||
<span class="tip"><strong>Pro-tip</strong>: Highlight text to add to ignore filters</span>
|
||||
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td id="diff-col" class="highlightable-filter">
|
||||
<pre style="border-left: 2px solid #ddd;">
|
||||
{{ content }}
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<pre id="difference" style="border-left: 2px solid #ddd;">{{ content| diff_unescape_difference_spans }}</pre>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="screenshot">
|
||||
<div class="tip">
|
||||
For now, Differences are performed on text, not graphically, only the latest screenshot is available.
|
||||
{{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}
|
||||
</div>
|
||||
<br>
|
||||
{% if is_html_webdriver %}
|
||||
{% if capabilities.supports_screenshots %}
|
||||
{% if screenshot %}
|
||||
<div class="snapshot-age">{{ watch.snapshot_screenshot_ctime|format_timestamp_timeago }}</div>
|
||||
<img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request">
|
||||
<img style="max-width: 80%" id="screenshot-img" alt="{{ _('Current screenshot from most recent request') }}">
|
||||
{% else %}
|
||||
No screenshot available just yet! Try rechecking the page.
|
||||
{{ _('No screenshot available just yet! Try rechecking the page.') }}
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<strong>Screenshot requires Playwright/WebDriver enabled</strong>
|
||||
<strong>{{ _('Screenshot requires a Content Fetcher ( Sockpuppetbrowser, selenium, etc ) that supports screenshots.') }}</strong>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -1,207 +1,12 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
||||
import os
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from flask import Blueprint, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
|
||||
views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates")
|
||||
|
||||
@views_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
content = []
|
||||
versions = []
|
||||
timestamp = None
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
triggered_line_numbers = []
|
||||
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
|
||||
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.args.get('version')
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
timestamp = preferred_version
|
||||
|
||||
try:
|
||||
versions = list(watch.history.keys())
|
||||
content = watch.get_history_snapshot(timestamp)
|
||||
|
||||
triggered_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch['trigger_text'],
|
||||
mode='line numbers'
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
|
||||
|
||||
output = render_template("preview.html",
|
||||
content=content,
|
||||
current_version=timestamp,
|
||||
history_n=watch.history_n,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - Diff - {watch.label} @ {timestamp}",
|
||||
triggered_line_numbers=triggered_line_numbers,
|
||||
current_diff_url=watch['url'],
|
||||
screenshot=watch.get_screenshot(),
|
||||
watch=watch,
|
||||
uuid=uuid,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_text=watch.get_error_text(),
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
versions=versions
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_build_report(uuid):
|
||||
from changedetectionio import forms
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# For submission of requesting an extract
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
if not extract_form.validate():
|
||||
flash("An error occurred, please see below.", "error")
|
||||
return _render_diff_template(uuid, extract_form)
|
||||
|
||||
else:
|
||||
extract_regex = request.form.get('extract_regex', '').strip()
|
||||
output = watch.extract_regex_from_all_history(extract_regex)
|
||||
if output:
|
||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||
response.headers['Content-type'] = 'text/csv'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
response.headers['Expires'] = "0"
|
||||
return response
|
||||
|
||||
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
|
||||
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract')
|
||||
|
||||
def _render_diff_template(uuid, extract_form=None):
|
||||
"""Helper function to render the diff template with all required data"""
|
||||
from changedetectionio import forms
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
|
||||
history = watch.history
|
||||
dates = list(history.keys())
|
||||
|
||||
# If a "from_version" was requested, then find it (or the closest one)
|
||||
# Also set "from version" to be the closest version to the one that was last viewed.
|
||||
|
||||
best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
|
||||
from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
|
||||
from_version = request.args.get('from_version', from_version_timestamp )
|
||||
|
||||
# Use the current one if nothing was specified
|
||||
to_version = request.args.get('to_version', str(dates[-1]))
|
||||
|
||||
try:
|
||||
to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
|
||||
to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
|
||||
|
||||
try:
|
||||
from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
|
||||
from_version_file_contents = f"Unable to read to-version {from_version}.\n"
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
||||
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
|
||||
return render_template("diff.html",
|
||||
current_diff_url=watch['url'],
|
||||
from_version=str(from_version),
|
||||
to_version=str(to_version),
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - Diff - {watch.label}",
|
||||
extract_form=extract_form,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
left_sticky=True,
|
||||
newest=to_version_file_contents,
|
||||
newest_version_timestamp=dates[-1],
|
||||
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||
from_version_file_contents=from_version_file_contents,
|
||||
to_version_file_contents=to_version_file_contents,
|
||||
screenshot=screenshot_url,
|
||||
uuid=uuid,
|
||||
versions=dates, # All except current/last
|
||||
watch_a=watch
|
||||
)
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
return _render_diff_template(uuid)
|
||||
|
||||
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
|
||||
@login_optionally_required
|
||||
@@ -216,7 +21,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
url = request.form.get('url').strip()
|
||||
if datastore.url_exists(url):
|
||||
flash(f'Warning, URL {url} already exists', "notice")
|
||||
flash(gettext('Warning, URL {} already exists').format(url), "notice")
|
||||
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
processor = request.form.get('processor', 'text_json_diff')
|
||||
@@ -224,12 +29,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
if new_uuid:
|
||||
if add_paused:
|
||||
flash('Watch added in Paused state, saving will unpause.')
|
||||
flash(gettext('Watch added in Paused state, saving will unpause.'))
|
||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||
else:
|
||||
# Straight into the queue.
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
flash("Watch added.")
|
||||
flash(gettext("Watch added."))
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
||||
|
||||
|
||||
@@ -2,10 +2,11 @@ import os
|
||||
import time
|
||||
|
||||
from flask import Blueprint, request, make_response, render_template, redirect, url_for, flash, session
|
||||
from flask_login import current_user
|
||||
from flask_paginate import Pagination, get_page_parameter
|
||||
from flask_babel import gettext as _
|
||||
|
||||
from changedetectionio import forms
|
||||
from changedetectionio import processors
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -73,7 +74,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
pagination = Pagination(page=page,
|
||||
total=total_count,
|
||||
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
|
||||
per_page=datastore.data['settings']['application'].get('pager_size', 50),
|
||||
css_framework="semantic",
|
||||
display_msg=_('displaying <b>{start} - {end}</b> {record_name} in total <b>{total}</b>'),
|
||||
record_name=_('records'))
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
@@ -84,13 +88,19 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
datastore=datastore,
|
||||
errored_count=errored_count,
|
||||
extra_classes='has-queue' if not update_q.empty() else '',
|
||||
form=form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=datastore.proxy_list,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||
processor_badge_css=processors.get_processor_badge_css(),
|
||||
processor_badge_texts=processors.get_processor_badge_texts(),
|
||||
processor_descriptions=processors.get_processor_descriptions(),
|
||||
queue_size=update_q.qsize(),
|
||||
queued_uuids=update_q.get_queued_uuids(),
|
||||
search_q=request.args.get('q', '').strip(),
|
||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
|
||||
<script>let nowtimeserver={{ now_time_server }};</script>
|
||||
<script>let favicon_baseURL="{{ url_for('static_content', group='favicon', filename="PLACEHOLDER")}}";</script>
|
||||
<script>
|
||||
@@ -18,27 +19,57 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
background-repeat: no-repeat;
|
||||
transition: background-size 0.9s ease
|
||||
}
|
||||
|
||||
/* Auto-generated processor badge colors */
|
||||
{{ processor_badge_css|safe }}
|
||||
|
||||
/* Auto-generated tag colors */
|
||||
{%- for uuid, tag in tags -%}
|
||||
{%- if tag and tag.title -%}
|
||||
{%- set class_name = tag.title|sanitize_tag_class -%}
|
||||
{%- set colors = generate_tag_colors(tag.title) -%}
|
||||
.button-tag.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
color: {{ colors['light']['color'] }};
|
||||
}
|
||||
|
||||
.watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
color: {{ colors['light']['color'] }};
|
||||
}
|
||||
|
||||
html[data-darkmode="true"] .button-tag.tag-{{ class_name }} {
|
||||
background-color: {{ colors['dark']['bg'] }};
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
|
||||
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['dark']['bg'] }};
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</style>
|
||||
<div class="box" id="form-quick-watch-add">
|
||||
|
||||
<form class="pure-form" action="{{ url_for('ui.ui_views.form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<fieldset>
|
||||
<legend>Add a new web page change detection watch</legend>
|
||||
<legend>{{ _('Add a new web page change detection watch') }}</legend>
|
||||
<div id="watch-add-wrapper-zone">
|
||||
{{ render_nolabel_field(form.url, placeholder="https://...", required=true) }}
|
||||
{{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }}
|
||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
||||
{{ render_nolabel_field(form.watch_submit_button, title=_("Watch this URL!") ) }}
|
||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title=_("Edit first then Watch") ) }}
|
||||
</div>
|
||||
<div id="watch-group-tag">
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="Watch group / tag", class="transparent-field") }}
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder=_("Watch group / tag"), class="transparent-field") }}
|
||||
</div>
|
||||
<div id="quick-watch-processor-type">
|
||||
{{ render_simple_field(form.processor) }}
|
||||
</div>
|
||||
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span>
|
||||
<span style="color:#eee; font-size: 80%;"><img alt="{{ _('Create a shareable link') }}" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > {{ _("Tip: You can also add 'shared' watches.") }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">{{ _('More info') }}</a></span>
|
||||
</form>
|
||||
</div>
|
||||
<div class="box">
|
||||
@@ -46,29 +77,44 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<input type="hidden" id="op_extradata" name="op_extradata" value="" >
|
||||
<div id="checkbox-operations">
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Pause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnPause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnMute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Recheck</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Tag</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mark viewed</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Use default notification</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear errors</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear/reset history</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Delete</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Pause') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('UnPause') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Mute') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('UnMute') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Recheck') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Tag') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Mark viewed') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Use default notification') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Clear errors') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"
|
||||
data-requires-confirm
|
||||
data-confirm-type="danger"
|
||||
data-confirm-title="{{ _('Clear Histories') }}"
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to clear history for the selected items?</p><p>This action cannot be undone.</p>') }}"
|
||||
data-confirm-button="{{ _('OK') }}"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Clear/reset history') }}</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"
|
||||
data-requires-confirm
|
||||
data-confirm-type="danger"
|
||||
data-confirm-title="{{ _('Delete Watches?') }}"
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to delete the selected watches?</strong></p><p>This action cannot be undone.</p>') }}"
|
||||
data-confirm-button="{{ _('Delete') }}"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Delete') }}</button>
|
||||
</div>
|
||||
{%- if watches|length >= pagination.per_page -%}
|
||||
{{ pagination.info }}
|
||||
{%- endif -%}
|
||||
{%- if search_q -%}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
|
||||
|
||||
<div id="stats_row">
|
||||
<div class="left">{%- if watches|length >= pagination.per_page -%}{{ pagination.info }}{%- endif -%}</div>
|
||||
<div class="right" >{{ _('Queued size') }}: <span id="queue-size-int">{{ queue_size }}</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
{%- if search_q -%}<div id="search-result-info">{{ _('Searching') }} "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
|
||||
<div>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">{{ _('All') }}</a>
|
||||
|
||||
<!-- tag list -->
|
||||
{%- for uuid, tag in tags -%}
|
||||
{%- if tag != "" -%}
|
||||
<a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
|
||||
<a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag tag-{{ tag.title|sanitize_tag_class }} {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
@@ -101,19 +147,19 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
|
||||
<a class="{{ 'active '+link_order if sort_attribute == 'notification_muted' else 'inactive' }}" href="{{url_for('watchlist.index', sort='notification_muted', order=link_order, tag=active_tag_uuid)}}"><i data-feather="volume-2" style="vertical-align: bottom; width: 14px; height: 14px; margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
|
||||
</th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">{{ _('Website') }} <span class='arrow {{link_order}}'></span></a></th>
|
||||
{%- if any_has_restock_price_processor -%}
|
||||
<th>Restock & Price</th>
|
||||
<th>{{ _('Restock & Price') }}</th>
|
||||
{%- endif -%}
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">{{ _('Last') }}</span> {{ _('Checked') }} <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">{{ _('Last') }}</span> {{ _('Changed') }} <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th class="empty-cell"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{%- if not watches|length -%}
|
||||
<tr>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">{{ _('No web page change detection watches configured, please add a URL in the box above, or') }} <a href="{{ url_for('imports.import_page')}}" >{{ _('import a list') }}</a>.</td>
|
||||
</tr>
|
||||
{%- endif -%}
|
||||
|
||||
@@ -155,7 +201,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<div class="flex-wrapper">
|
||||
{% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
|
||||
<div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder' #}
|
||||
<img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {% endif %} />
|
||||
<img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {% endif %} >
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
@@ -167,26 +213,24 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if watch['processor'] == 'restock_diff' -%}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
||||
<span class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</span>
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
{%- if watch.get_fetch_backend == "html_webdriver"
|
||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||
or "extra_browser_" in watch.get_fetch_backend
|
||||
-%}
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||
{%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
|
||||
{%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
|
||||
{{ effective_fetcher|fetcher_status_icons }}
|
||||
{%- endif -%}
|
||||
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
|
||||
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
|
||||
@@ -198,20 +242,20 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<td class="restock-and-price">
|
||||
{%- if watch['processor'] == 'restock_diff' -%}
|
||||
{%- if watch.has_restock_info -%}
|
||||
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
|
||||
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="{{ _('Detecting restock and price') }}">
|
||||
<!-- maybe some object watch['processor'][restock_diff] or.. -->
|
||||
{%- if watch['restock']['in_stock']-%} In stock {%- else-%} Not in stock {%- endif -%}
|
||||
{%- if watch['restock']['in_stock']-%} {{ _('In stock') }} {%- else-%} {{ _('Not in stock') }} {%- endif -%}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
|
||||
{%- if watch['restock']['price'] != None -%}
|
||||
<span class="restock-label price" title="Price">
|
||||
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">No information</span>
|
||||
<span class="restock-label error">{{ _('No information') }}</span>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
</td>
|
||||
@@ -219,24 +263,24 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
{#last_checked becomes fetch-start-time#}
|
||||
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
|
||||
<div class="spinner-wrapper" style="display:none;" >
|
||||
<span class="spinner"></span><span> Checking now</span>
|
||||
<span class="spinner"></span><span class="status-text"> {{ _('Checking now') }}</span>
|
||||
</div>
|
||||
<span class="innertext">{{watch|format_last_checked_time|safe}}</span>
|
||||
</td>
|
||||
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
|
||||
{{watch.last_changed|format_timestamp_timeago}}
|
||||
{%- else -%}
|
||||
Not yet
|
||||
{{ _('Not yet') }}
|
||||
{%- endif -%}
|
||||
</td>
|
||||
<td class="buttons">
|
||||
<div>
|
||||
{%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%}
|
||||
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a>
|
||||
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a>
|
||||
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a>
|
||||
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">{{ _('Queued') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">{{ _('Recheck') }}</a>
|
||||
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">{{ _('History') }}</a>
|
||||
<a href="{{ url_for('ui.ui_preview.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">{{ _('Preview') }}</a>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -245,22 +289,21 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
</table>
|
||||
<ul id="post-list-buttons">
|
||||
<li id="post-list-with-errors" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
|
||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">{{ _('With errors') }} ({{ errored_count }})</a>
|
||||
</li>
|
||||
<li id="post-list-mark-views" style="display: none;" >
|
||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
|
||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">{{ _('Mark all viewed') }}</a>
|
||||
</li>
|
||||
{%- if active_tag_uuid -%}
|
||||
<li id="post-list-mark-views-tag">
|
||||
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
|
||||
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">{{ _("Mark all viewed in '%(title)s'", title=active_tag.title) }}</a>
|
||||
</li>
|
||||
{%- endif -%}
|
||||
<li id="post-list-unread" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
|
||||
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">{{ _('Unread') }} (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
|
||||
all {% if active_tag_uuid %} in '{{active_tag.title}}'{%endif%}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">{{ _('Recheck all') }} {% if active_tag_uuid %} {{ _("in '%(title)s'", title=active_tag.title) }}{%endif%}</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
"""
|
||||
Levenshtein distance and similarity plugin for text change detection.
|
||||
Provides metrics for measuring text similarity between snapshots.
|
||||
"""
|
||||
import pluggy
|
||||
from loguru import logger
|
||||
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
"""
|
||||
Word count plugin for content analysis.
|
||||
Provides word count metrics for snapshot content.
|
||||
"""
|
||||
import pluggy
|
||||
from loguru import logger
|
||||
|
||||
@@ -14,7 +18,7 @@ def count_words_in_history(watch, incoming_text=None):
|
||||
elif watch.history.keys():
|
||||
# When called from UI extras to count latest snapshot
|
||||
latest_key = list(watch.history.keys())[-1]
|
||||
latest_content = watch.get_history_snapshot(latest_key)
|
||||
latest_content = watch.get_history_snapshot(timestamp=latest_key)
|
||||
return len(latest_content.split())
|
||||
return 0
|
||||
except Exception as e:
|
||||
|
||||
@@ -7,6 +7,9 @@ import os
|
||||
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
|
||||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
|
||||
|
||||
# Import hookimpl from centralized pluggy interface
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000
|
||||
SCREENSHOT_DEFAULT_QUALITY = 40
|
||||
|
||||
@@ -18,7 +21,9 @@ SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_
|
||||
# The size at which we will switch to stitching method, when below this (and
|
||||
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
|
||||
# screenshot method.
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
|
||||
# Increased from 8000 to 10000 for better performance (fewer chunks = faster)
|
||||
# Most modern GPUs support 16384x16384 textures, so 1280x10000 is safe
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD = int(os.getenv("SCREENSHOT_CHUNK_HEIGHT", 10000))
|
||||
|
||||
# available_fetchers() will scan this implementation looking for anything starting with html_
|
||||
# this information is used in the form selections
|
||||
@@ -35,17 +40,54 @@ def available_fetchers():
|
||||
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
||||
import inspect
|
||||
p = []
|
||||
|
||||
# Get built-in fetchers (but skip plugin fetchers that were added via setattr)
|
||||
for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
|
||||
if inspect.isclass(obj):
|
||||
# @todo html_ is maybe better as fetcher_ or something
|
||||
# In this case, make sure to edit the default one in store.py and fetch_site_status.py
|
||||
if name.startswith('html_'):
|
||||
t = tuple([name, obj.fetcher_description])
|
||||
p.append(t)
|
||||
# Skip plugin fetchers that were already registered
|
||||
if name not in _plugin_fetchers:
|
||||
t = tuple([name, obj.fetcher_description])
|
||||
p.append(t)
|
||||
|
||||
# Get plugin fetchers from cache (already loaded at module init)
|
||||
for name, fetcher_class in _plugin_fetchers.items():
|
||||
if hasattr(fetcher_class, 'fetcher_description'):
|
||||
t = tuple([name, fetcher_class.fetcher_description])
|
||||
p.append(t)
|
||||
else:
|
||||
logger.warning(f"Plugin fetcher '{name}' does not have fetcher_description attribute")
|
||||
|
||||
return p
|
||||
|
||||
|
||||
def get_plugin_fetchers():
|
||||
"""Load and return all plugin fetchers from the centralized plugin manager."""
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
|
||||
fetchers = {}
|
||||
try:
|
||||
# Call the register_content_fetcher hook from all registered plugins
|
||||
results = plugin_manager.hook.register_content_fetcher()
|
||||
for result in results:
|
||||
if result:
|
||||
name, fetcher_class = result
|
||||
fetchers[name] = fetcher_class
|
||||
# Register in current module so hasattr() checks work
|
||||
setattr(sys.modules[__name__], name, fetcher_class)
|
||||
logger.info(f"Registered plugin fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', 'No description')}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading plugin fetchers: {e}")
|
||||
|
||||
return fetchers
|
||||
|
||||
|
||||
# Initialize plugins at module load time
|
||||
_plugin_fetchers = get_plugin_fetchers()
|
||||
|
||||
|
||||
# Decide which is the 'real' HTML webdriver, this is more a system wide config
|
||||
# rather than site-specific.
|
||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||||
@@ -62,3 +104,8 @@ else:
|
||||
logger.debug("Falling back to selenium as fetcher")
|
||||
from .webdriver_selenium import fetcher as html_webdriver
|
||||
|
||||
|
||||
# Register built-in fetchers as plugins after all imports are complete
|
||||
from changedetectionio.pluggy_interface import register_builtin_fetchers
|
||||
register_builtin_fetchers()
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ class Fetcher():
|
||||
favicon_blob = None
|
||||
instock_data = None
|
||||
instock_data_js = ""
|
||||
screenshot_format = None
|
||||
status_code = None
|
||||
webdriver_js_execute_code = None
|
||||
xpath_data = None
|
||||
@@ -64,6 +65,35 @@ class Fetcher():
|
||||
# Time ONTOP of the system defined env minimum time
|
||||
render_extract_delay = 0
|
||||
|
||||
# Fetcher capability flags - subclasses should override these
|
||||
# These indicate what features the fetcher supports
|
||||
supports_browser_steps = False # Can execute browser automation steps
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return data for status icon to display in the watch overview.
|
||||
|
||||
This method can be overridden by subclasses to provide custom status icons.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with icon data:
|
||||
{
|
||||
'filename': 'icon-name.svg', # Icon filename
|
||||
'alt': 'Alt text', # Alt attribute
|
||||
'title': 'Tooltip text', # Title attribute
|
||||
'style': 'height: 1em;' # Optional inline CSS
|
||||
}
|
||||
Or None if no icon
|
||||
"""
|
||||
return None
|
||||
|
||||
def clear_content(self):
|
||||
"""
|
||||
Explicitly clear all content from memory to free up heap space.
|
||||
@@ -92,12 +122,13 @@ class Fetcher():
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
# Should set self.error, self.status_code and self.content
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def quit(self, watch=None):
|
||||
async def quit(self, watch=None):
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
@@ -9,10 +10,10 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, vi
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
async def capture_full_page_async(page):
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
from multiprocessing import Process, Pipe
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
|
||||
@@ -26,8 +27,20 @@ async def capture_full_page_async(page):
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
@@ -35,36 +48,68 @@ async def capture_full_page_async(page):
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# Only scroll if not at the top (y > 0)
|
||||
if y > 0:
|
||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||
|
||||
# Request GC only before screenshot (not 3x per chunk)
|
||||
await page.request_gc()
|
||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||
await page.request_gc()
|
||||
screenshot_chunks.append(await page.screenshot(
|
||||
type="jpeg",
|
||||
full_page=False,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
))
|
||||
|
||||
screenshot_kwargs = {
|
||||
'type': screenshot_type,
|
||||
'full_page': False
|
||||
}
|
||||
# Only pass quality parameter for jpeg (PNG doesn't support it in Playwright)
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
y += step_size
|
||||
await page.request_gc()
|
||||
|
||||
# Restore original viewport size
|
||||
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
|
||||
# Unlock element dimensions if they were locked
|
||||
if elements_locked:
|
||||
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'unlock-elements-sizing.js')
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
|
||||
# If we have multiple chunks, stitch them together
|
||||
if len(screenshot_chunks) > 1:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
parent_conn, child_conn = Pipe()
|
||||
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
|
||||
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
|
||||
# Only use separate process for many chunks (4+) to avoid blocking the event loop
|
||||
if len(screenshot_chunks) <= 3:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
|
||||
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
else:
|
||||
# Use separate process for many chunks to avoid blocking
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
# Explicit cleanup
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
# Explicit cleanup
|
||||
del screenshot_chunks
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
screenshot_chunks = None
|
||||
return screenshot
|
||||
|
||||
@@ -89,8 +134,22 @@ class fetcher(Fetcher):
|
||||
|
||||
proxy = None
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
# Capability flags
|
||||
supports_browser_steps = True
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for Playwright fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
||||
|
||||
@@ -125,22 +184,35 @@ class fetcher(Fetcher):
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = await capture_full_page_async(page=self.page)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
# Request GC immediately after screenshot to free memory
|
||||
# Screenshots can be large and browser steps take many of them
|
||||
await self.page.request_gc()
|
||||
|
||||
if self.browser_steps_screenshot_path is not None:
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
||||
logger.debug(f"Saving step screenshot to {destination}")
|
||||
with open(destination, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
# Clear local reference to allow screenshot bytes to be collected
|
||||
del screenshot
|
||||
gc.collect()
|
||||
|
||||
async def save_step_html(self, step_n):
|
||||
super().save_step_html(step_n=step_n)
|
||||
content = await self.page.content()
|
||||
|
||||
# Request GC after getting page content
|
||||
await self.page.request_gc()
|
||||
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
||||
logger.debug(f"Saving step HTML to {destination}")
|
||||
with open(destination, 'w') as f:
|
||||
with open(destination, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
# Clear local reference
|
||||
del content
|
||||
gc.collect()
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
@@ -151,8 +223,10 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
@@ -244,7 +318,13 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page)
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
|
||||
# Cleanup before raising to prevent memory leak
|
||||
await self.page.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
# Force garbage collection to release Playwright resources immediately
|
||||
gc.collect()
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
@@ -253,48 +333,52 @@ class fetcher(Fetcher):
|
||||
await browser.close()
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
# Run Browser Steps here
|
||||
if self.browser_steps_get_valid_steps():
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
now = time.time()
|
||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||
if current_include_filters is not None:
|
||||
await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
else:
|
||||
await self.page.evaluate("var include_filters=''")
|
||||
await self.page.request_gc()
|
||||
|
||||
# request_gc before and after evaluate to free up memory
|
||||
# @todo browsersteps etc
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
})
|
||||
await self.page.request_gc()
|
||||
|
||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
||||
await self.page.request_gc()
|
||||
|
||||
self.content = await self.page.content()
|
||||
await self.page.request_gc()
|
||||
logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
|
||||
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
# JPEG is better here because the screenshots can be very very large
|
||||
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
# Wrap remaining operations in try/finally to ensure cleanup
|
||||
try:
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page)
|
||||
# Run Browser Steps here
|
||||
if self.browser_steps_get_valid_steps():
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
now = time.time()
|
||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||
if current_include_filters is not None:
|
||||
await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
else:
|
||||
await self.page.evaluate("var include_filters=''")
|
||||
await self.page.request_gc()
|
||||
|
||||
# request_gc before and after evaluate to free up memory
|
||||
# @todo browsersteps etc
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
})
|
||||
await self.page.request_gc()
|
||||
|
||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
||||
await self.page.request_gc()
|
||||
|
||||
self.content = await self.page.content()
|
||||
await self.page.request_gc()
|
||||
logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
|
||||
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
# JPEG is better here because the screenshots can be very very large
|
||||
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
except ScreenshotUnavailable:
|
||||
# Re-raise screenshot unavailable exceptions
|
||||
raise
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
@@ -329,5 +413,22 @@ class fetcher(Fetcher):
|
||||
pass
|
||||
browser = None
|
||||
|
||||
# Force Python GC to release Playwright resources immediately
|
||||
# Playwright objects can have circular references that delay cleanup
|
||||
gc.collect()
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class PlaywrightFetcherPlugin:
|
||||
"""Plugin class that registers the Playwright fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the Playwright fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
playwright_plugin = PlaywrightFetcherPlugin()
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -20,10 +20,10 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
async def capture_full_page(page):
|
||||
async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
from multiprocessing import Process, Pipe
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
|
||||
@@ -41,11 +41,25 @@ async def capture_full_page(page):
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
if page_height > page.viewport['height']:
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page() changes viewport height which triggers @media (min-height) rules
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
@@ -60,18 +74,34 @@ async def capture_full_page(page):
|
||||
y
|
||||
)
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(type_='jpeg',
|
||||
fullPage=False,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
|
||||
screenshot_kwargs = {
|
||||
'type_': screenshot_type,
|
||||
'fullPage': False
|
||||
}
|
||||
# PNG doesn't support quality parameter in Puppeteer
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
y += step_size
|
||||
|
||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
|
||||
# Unlock element dimensions if they were locked
|
||||
if elements_locked:
|
||||
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'unlock-elements-sizing.js')
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
parent_conn, child_conn = Pipe()
|
||||
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
@@ -93,13 +123,27 @@ class fetcher(Fetcher):
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
|
||||
|
||||
browser = None
|
||||
browser_type = ''
|
||||
command_executor = ''
|
||||
|
||||
proxy = None
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
# Capability flags
|
||||
supports_browser_steps = True
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for Puppeteer fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if custom_browser_connection_url:
|
||||
self.browser_connection_is_custom = True
|
||||
@@ -128,21 +172,20 @@ class fetcher(Fetcher):
|
||||
proxy_url += f"{parsed.hostname}{port}{parsed.path}{q}"
|
||||
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
|
||||
|
||||
# def screenshot_step(self, step_n=''):
|
||||
# screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
|
||||
#
|
||||
# if self.browser_steps_screenshot_path is not None:
|
||||
# destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
||||
# logger.debug(f"Saving step screenshot to {destination}")
|
||||
# with open(destination, 'wb') as f:
|
||||
# f.write(screenshot)
|
||||
#
|
||||
# def save_step_html(self, step_n):
|
||||
# content = self.page.content()
|
||||
# destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
||||
# logger.debug(f"Saving step HTML to {destination}")
|
||||
# with open(destination, 'w') as f:
|
||||
# f.write(content)
|
||||
async def quit(self, watch=None):
|
||||
try:
|
||||
await self.page.close()
|
||||
del self.page
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
try:
|
||||
await self.browser.close()
|
||||
del self.browser
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
logger.info("Cleanup puppeteer complete.")
|
||||
|
||||
async def fetch_page(self,
|
||||
current_include_filters,
|
||||
@@ -153,13 +196,15 @@ class fetcher(Fetcher):
|
||||
request_body,
|
||||
request_headers,
|
||||
request_method,
|
||||
screenshot_format,
|
||||
timeout,
|
||||
url,
|
||||
watch_uuid
|
||||
):
|
||||
import re
|
||||
self.delete_browser_steps_screenshots()
|
||||
|
||||
n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||
n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 12)) + self.render_extract_delay
|
||||
extra_wait = min(n, 15)
|
||||
|
||||
logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.")
|
||||
@@ -170,9 +215,9 @@ class fetcher(Fetcher):
|
||||
# Connect directly using the specified browser_ws_endpoint
|
||||
# @todo timeout
|
||||
try:
|
||||
browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||
ignoreHTTPSErrors=True
|
||||
)
|
||||
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||
ignoreHTTPSErrors=True
|
||||
)
|
||||
except websockets.exceptions.InvalidStatusCode as e:
|
||||
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
||||
except websockets.exceptions.InvalidURI:
|
||||
@@ -181,7 +226,7 @@ class fetcher(Fetcher):
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||
|
||||
# more reliable is to just request a new page
|
||||
self.page = await browser.newPage()
|
||||
self.page = await self.browser.newPage()
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||
@@ -196,7 +241,6 @@ class fetcher(Fetcher):
|
||||
"height": int(match.group(2))
|
||||
})
|
||||
logger.debug(f"Puppeteer viewport size {self.page.viewport}")
|
||||
|
||||
try:
|
||||
from pyppeteerstealth import inject_evasions_into_page
|
||||
except ImportError:
|
||||
@@ -241,7 +285,12 @@ class fetcher(Fetcher):
|
||||
# browsersteps_interface = steppable_browser_interface()
|
||||
# browsersteps_interface.page = self.page
|
||||
|
||||
async def handle_frame_navigation(event):
|
||||
# Enable Network domain to detect when first bytes arrive
|
||||
await self.page._client.send('Network.enable')
|
||||
|
||||
# Now set up the frame navigation handlers
|
||||
async def handle_frame_navigation(event=None):
|
||||
# Wait n seconds after the frameStartedLoading, not from any frameStartedLoading/frameStartedNavigating
|
||||
logger.debug(f"Frame navigated: {event}")
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
@@ -250,24 +299,35 @@ class fetcher(Fetcher):
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
|
||||
self.page._client.on('Page.frameStartedNavigating', lambda event: asyncio.create_task(handle_frame_navigation(event)))
|
||||
self.page._client.on('Page.frameStartedLoading', lambda event: asyncio.create_task(handle_frame_navigation(event)))
|
||||
self.page._client.on('Page.frameStoppedLoading', lambda event: logger.debug(f"Frame stopped loading: {event}"))
|
||||
async def setup_frame_handlers_on_first_response(event):
|
||||
# Only trigger for the main document response
|
||||
if event.get('type') == 'Document':
|
||||
logger.debug("First response received, setting up frame handlers for forced page stop load.")
|
||||
self.page._client.on('Page.frameStartedNavigating', lambda e: asyncio.create_task(handle_frame_navigation(e)))
|
||||
self.page._client.on('Page.frameStartedLoading', lambda e: asyncio.create_task(handle_frame_navigation(e)))
|
||||
self.page._client.on('Page.frameStoppedLoading', lambda e: logger.debug(f"Frame stopped loading: {e}"))
|
||||
logger.debug("First response received, setting up frame handlers for forced page stop load DONE SETUP")
|
||||
# De-register this listener - we only need it once
|
||||
self.page._client.remove_listener('Network.responseReceived', setup_frame_handlers_on_first_response)
|
||||
|
||||
# Listen for first response to trigger frame handler setup
|
||||
self.page._client.on('Network.responseReceived', setup_frame_handlers_on_first_response)
|
||||
|
||||
response = None
|
||||
attempt=0
|
||||
while not response:
|
||||
logger.debug(f"Attempting page fetch {url} attempt {attempt}")
|
||||
response = await self.page.goto(url)
|
||||
asyncio.create_task(handle_frame_navigation())
|
||||
response = await self.page.goto(url, timeout=0)
|
||||
await asyncio.sleep(1 + extra_wait)
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
|
||||
if response:
|
||||
break
|
||||
if not response:
|
||||
logger.warning("Page did not fetch! trying again!")
|
||||
if response is None and attempt>=2:
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
logger.warning(f"Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content) exiting attmpt {attempt}")
|
||||
logger.warning(f"Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content) exiting attempt {attempt}")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
attempt+=1
|
||||
|
||||
@@ -279,8 +339,6 @@ class fetcher(Fetcher):
|
||||
except Exception as e:
|
||||
logger.warning("Got exception when running evaluate on custom JS code")
|
||||
logger.error(str(e))
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
# This can be ok, we will try to grab what we could retrieve
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
@@ -290,8 +348,6 @@ class fetcher(Fetcher):
|
||||
# https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
|
||||
logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
|
||||
logger.critical(response)
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if fetch_favicon:
|
||||
@@ -301,7 +357,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page)
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
@@ -309,8 +365,6 @@ class fetcher(Fetcher):
|
||||
|
||||
if not empty_pages_are_a_change and len(content.strip()) == 0:
|
||||
logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers")
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
# Run Browser Steps here
|
||||
@@ -328,6 +382,12 @@ class fetcher(Fetcher):
|
||||
await self.page.evaluate(f"var include_filters=''")
|
||||
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
|
||||
self.content = await self.page.content
|
||||
|
||||
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
@@ -335,17 +395,10 @@ class fetcher(Fetcher):
|
||||
if not self.xpath_data:
|
||||
raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)")
|
||||
|
||||
|
||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
||||
|
||||
self.content = await self.page.content
|
||||
|
||||
self.screenshot = await capture_full_page(page=self.page)
|
||||
|
||||
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
|
||||
logger.success(f"Fetching '{url}' complete, closing page")
|
||||
await self.page.close()
|
||||
logger.success(f"Fetching '{url}' complete, closing browser")
|
||||
await browser.close()
|
||||
logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.")
|
||||
|
||||
async def main(self, **kwargs):
|
||||
@@ -360,8 +413,10 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
|
||||
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
||||
@@ -378,9 +433,24 @@ class fetcher(Fetcher):
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
screenshot_format=None,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
watch_uuid=watch_uuid,
|
||||
), timeout=max_time
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class PuppeteerFetcherPlugin:
|
||||
"""Plugin class that registers the Puppeteer fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the Puppeteer fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
puppeteer_plugin = PuppeteerFetcherPlugin()
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import asyncio
|
||||
from functools import partial
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
@@ -11,8 +13,8 @@ from changedetectionio.content_fetchers.base import Fetcher
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Basic fast Plaintext/HTTP Client"
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.proxy_override = proxy_override
|
||||
# browser_connection_url is none because its always 'launched locally'
|
||||
|
||||
@@ -25,7 +27,9 @@ class fetcher(Fetcher):
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
empty_pages_are_a_change=False,
|
||||
watch_uuid=None,
|
||||
):
|
||||
"""Synchronous version of run - the original requests implementation"""
|
||||
|
||||
import chardet
|
||||
@@ -76,9 +80,22 @@ class fetcher(Fetcher):
|
||||
if not is_binary:
|
||||
# Don't run this for PDF (and requests identified as binary) takes a _long_ time
|
||||
if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
# For XML/RSS feeds, check the XML declaration for encoding attribute
|
||||
# This is more reliable than chardet which can misdetect UTF-8 as MacRoman
|
||||
content_type = r.headers.get('content-type', '').lower()
|
||||
if 'xml' in content_type or 'rss' in content_type:
|
||||
# Look for <?xml version="1.0" encoding="UTF-8"?>
|
||||
xml_encoding_match = re.search(rb'<\?xml[^>]+encoding=["\']([^"\']+)["\']', r.content[:200])
|
||||
if xml_encoding_match:
|
||||
r.encoding = xml_encoding_match.group(1).decode('ascii')
|
||||
else:
|
||||
# Default to UTF-8 for XML if no encoding found
|
||||
r.encoding = 'utf-8'
|
||||
else:
|
||||
# For other content types, use chardet
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
self.headers = r.headers
|
||||
|
||||
@@ -104,6 +121,12 @@ class fetcher(Fetcher):
|
||||
|
||||
self.raw_content = r.content
|
||||
|
||||
# If the content is an image, set it as screenshot for SSIM/visual comparison
|
||||
content_type = r.headers.get('content-type', '').lower()
|
||||
if 'image/' in content_type:
|
||||
self.screenshot = r.content
|
||||
logger.debug(f"Image content detected ({content_type}), set as screenshot for comparison")
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
@@ -113,8 +136,10 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
"""Async wrapper that runs the synchronous requests code in a thread pool"""
|
||||
|
||||
@@ -132,11 +157,12 @@ class fetcher(Fetcher):
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=current_include_filters,
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
watch_uuid=watch_uuid,
|
||||
)
|
||||
)
|
||||
|
||||
def quit(self, watch=None):
|
||||
async def quit(self, watch=None):
|
||||
|
||||
# In case they switched to `requests` fetcher from something else
|
||||
# Then the screenshot could be old, in any case, it's not used here.
|
||||
@@ -149,3 +175,15 @@ class fetcher(Fetcher):
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}")
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class RequestsFetcherPlugin:
|
||||
"""Plugin class that registers the requests fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the requests fetcher"""
|
||||
return ('html_requests', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
requests_plugin = RequestsFetcherPlugin()
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
/**
|
||||
* Lock Element Dimensions for Screenshot Capture
|
||||
*
|
||||
* THE PROBLEM:
|
||||
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
|
||||
* 1. Temporarily change the viewport height to a large value (e.g., 800px → 3809px)
|
||||
* 2. Take screenshots in chunks while scrolling
|
||||
* 3. Stitch the chunks together
|
||||
*
|
||||
* However, changing the viewport height triggers CSS media queries like:
|
||||
* @media (min-height: 860px) { .ad { height: 250px; } }
|
||||
*
|
||||
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
|
||||
* - Screenshot shows element at NEW size (after media query triggered)
|
||||
* - xpath element coordinates measured at OLD size (before viewport change)
|
||||
* - Visual selector overlays don't align with screenshot
|
||||
*
|
||||
* EXAMPLE BUG:
|
||||
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
|
||||
* - Viewport changes to 1280x3809 for screenshot
|
||||
* - Media query triggers: ad expands to 250px
|
||||
* - All content below shifts down by 112px (250-138)
|
||||
* - Article now at position: 391px (279+112)
|
||||
* - But xpath data says 279px → 112px mismatch! ✗
|
||||
*
|
||||
* THE SOLUTION:
|
||||
* Before changing viewport, lock ALL element dimensions with !important inline styles.
|
||||
* Inline styles with !important override media query CSS, preventing layout changes.
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Iterates through every element on the page
|
||||
* 2. Captures current computed dimensions (width, height)
|
||||
* 3. Sets inline styles with !important to freeze those dimensions
|
||||
* 4. Disables ResizeObserver API (for JS-based resizing)
|
||||
* 5. When viewport changes for screenshot, media queries can't resize anything
|
||||
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
|
||||
* The page must be fully loaded and settled at its initial viewport size.
|
||||
* No need to restore state afterward - page is closed after screenshot.
|
||||
*
|
||||
* PERFORMANCE:
|
||||
* - Iterates all DOM elements (can be 1000s on complex pages)
|
||||
* - Typically completes in 50-200ms
|
||||
* - One-time cost before screenshot, well worth it for coordinate accuracy
|
||||
*
|
||||
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
|
||||
*/
|
||||
|
||||
(() => {
|
||||
// Store original styles in a global WeakMap for later restoration
|
||||
window.__elementSizingRestore = new WeakMap();
|
||||
|
||||
// Lock ALL element dimensions to prevent media query layout changes
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const rect = el.getBoundingClientRect();
|
||||
|
||||
// Save original inline style values BEFORE locking
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
const originalStyles = {};
|
||||
properties.forEach(prop => {
|
||||
originalStyles[prop] = {
|
||||
value: el.style.getPropertyValue(prop),
|
||||
priority: el.style.getPropertyPriority(prop)
|
||||
};
|
||||
});
|
||||
window.__elementSizingRestore.set(el, originalStyles);
|
||||
|
||||
// Lock dimensions with !important to override media queries
|
||||
if (rect.height > 0) {
|
||||
el.style.setProperty('height', computed.height, 'important');
|
||||
el.style.setProperty('min-height', computed.height, 'important');
|
||||
el.style.setProperty('max-height', computed.height, 'important');
|
||||
}
|
||||
if (rect.width > 0) {
|
||||
el.style.setProperty('width', computed.width, 'important');
|
||||
el.style.setProperty('min-width', computed.width, 'important');
|
||||
el.style.setProperty('max-width', computed.width, 'important');
|
||||
}
|
||||
});
|
||||
|
||||
// Also disable ResizeObserver for JS-based resizing
|
||||
window.ResizeObserver = class {
|
||||
constructor() {}
|
||||
observe() {}
|
||||
unobserve() {}
|
||||
disconnect() {}
|
||||
};
|
||||
|
||||
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
|
||||
})();
|
||||
@@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Unlock Element Dimensions After Screenshot Capture
|
||||
*
|
||||
* This script removes the inline !important styles that were applied by lock-elements-sizing.js
|
||||
* and restores elements to their original state using the WeakMap created during locking.
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script AFTER completing screenshot capture and restoring the viewport.
|
||||
* This allows the page to return to its normal responsive behavior.
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Iterates through every element that was locked
|
||||
* 2. Reads original style values from the global WeakMap
|
||||
* 3. Restores original inline styles (or removes them if they weren't set originally)
|
||||
* 4. Cleans up the WeakMap
|
||||
*
|
||||
* @see lock-elements-sizing.js for the locking mechanism
|
||||
*/
|
||||
|
||||
(() => {
|
||||
// Check if the restore map exists
|
||||
if (!window.__elementSizingRestore) {
|
||||
console.log('⚠ Element sizing restore map not found - elements may not have been locked');
|
||||
return;
|
||||
}
|
||||
|
||||
// Restore all locked dimension styles to their original state
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const originalStyles = window.__elementSizingRestore.get(el);
|
||||
|
||||
if (originalStyles) {
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
|
||||
properties.forEach(prop => {
|
||||
const original = originalStyles[prop];
|
||||
|
||||
if (original.value) {
|
||||
// Restore original value with original priority
|
||||
el.style.setProperty(prop, original.value, original.priority || '');
|
||||
} else {
|
||||
// Was not set originally, so remove it
|
||||
el.style.removeProperty(prop);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Clean up the global WeakMap
|
||||
delete window.__elementSizingRestore;
|
||||
|
||||
console.log('✓ Element dimensions unlocked - page restored to original state');
|
||||
})();
|
||||
@@ -8,14 +8,90 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
||||
|
||||
# Cache font to avoid loading on every stitch
|
||||
_cached_font = None
|
||||
|
||||
def _get_caption_font():
|
||||
"""Get or create cached font for caption text."""
|
||||
global _cached_font
|
||||
if _cached_font is None:
|
||||
from PIL import ImageFont
|
||||
try:
|
||||
_cached_font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
_cached_font = ImageFont.load_default()
|
||||
return _cached_font
|
||||
|
||||
|
||||
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together inline (no multiprocessing).
|
||||
Optimized for small number of chunks (2-3) to avoid process creation overhead.
|
||||
|
||||
Args:
|
||||
chunks_bytes: List of JPEG image bytes
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
|
||||
Returns:
|
||||
bytes: Stitched JPEG image
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
# Create stitched image
|
||||
stitched = Image.new('RGB', (max_width, total_height))
|
||||
y_offset = 0
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font = _get_caption_font()
|
||||
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode to JPEG
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result = output.getvalue()
|
||||
|
||||
# Cleanup
|
||||
stitched.close()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
Used for large number of chunks (4+) to avoid blocking the main event loop.
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
try:
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
@@ -27,21 +103,17 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption on top (overlaid, not extending canvas)
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font_size = 35
|
||||
font_color = (255, 0, 0)
|
||||
background_color = (255, 255, 255)
|
||||
|
||||
|
||||
# Try to load a proper font
|
||||
# Try to load font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", font_size)
|
||||
font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
font = ImageFont.load_default()
|
||||
|
||||
@@ -49,19 +121,16 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white rectangle background behind text
|
||||
rect_top = 0
|
||||
rect_bottom = text_height + 2 * padding
|
||||
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered horizontally, 10px padding from top of the rectangle
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
text_y = padding
|
||||
draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode and send image
|
||||
# Encode and send image with optimization
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)))
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
pipe_conn.send_bytes(output.getvalue())
|
||||
|
||||
stitched.close()
|
||||
|
||||
@@ -14,8 +14,22 @@ class fetcher(Fetcher):
|
||||
proxy = None
|
||||
proxy_url = None
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
# Capability flags
|
||||
supports_browser_steps = False
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for WebDriver fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
from urllib.parse import urlparse
|
||||
from selenium.webdriver.common.proxy import Proxy
|
||||
|
||||
@@ -55,8 +69,10 @@ class fetcher(Fetcher):
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
|
||||
import asyncio
|
||||
@@ -131,7 +147,34 @@ class fetcher(Fetcher):
|
||||
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
|
||||
self.content = driver.page_source
|
||||
self.headers = {}
|
||||
self.screenshot = driver.get_screenshot_as_png()
|
||||
|
||||
# Selenium always captures as PNG, convert to JPEG if needed
|
||||
screenshot_png = driver.get_screenshot_as_png()
|
||||
|
||||
# Convert to JPEG if requested (for smaller file size)
|
||||
if self.screenshot_format and self.screenshot_format.upper() == 'JPEG':
|
||||
from PIL import Image
|
||||
import io
|
||||
img = Image.open(io.BytesIO(screenshot_png))
|
||||
# Convert to RGB if needed (JPEG doesn't support transparency)
|
||||
# Always convert non-RGB modes to RGB to ensure JPEG compatibility
|
||||
if img.mode in ('RGBA', 'LA', 'P', 'PA'):
|
||||
# Handle transparency by compositing onto white background
|
||||
if img.mode == 'P':
|
||||
img = img.convert('RGBA')
|
||||
background = Image.new('RGB', img.size, (255, 255, 255))
|
||||
if img.mode in ('RGBA', 'LA', 'PA'):
|
||||
background.paste(img, mask=img.split()[-1]) # Use alpha channel as mask
|
||||
img = background
|
||||
elif img.mode != 'RGB':
|
||||
# For other modes, direct conversion
|
||||
img = img.convert('RGB')
|
||||
jpeg_buffer = io.BytesIO()
|
||||
img.save(jpeg_buffer, format='JPEG', quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||
self.screenshot = jpeg_buffer.getvalue()
|
||||
img.close()
|
||||
else:
|
||||
self.screenshot = screenshot_png
|
||||
except Exception as e:
|
||||
driver.quit()
|
||||
raise e
|
||||
@@ -141,3 +184,16 @@ class fetcher(Fetcher):
|
||||
# Run the selenium operations in a thread pool to avoid blocking the event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, _run_sync)
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class WebDriverSeleniumFetcherPlugin:
|
||||
"""Plugin class that registers the WebDriver Selenium fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the WebDriver Selenium fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
webdriver_selenium_plugin = WebDriverSeleniumFetcherPlugin()
|
||||
|
||||
@@ -57,14 +57,15 @@ class SignalPriorityQueue(queue.PriorityQueue):
|
||||
def put(self, item, block=True, timeout=None):
|
||||
# Call the parent's put method first
|
||||
super().put(item, block, timeout)
|
||||
|
||||
|
||||
# After putting the item in the queue, check if it has a UUID and emit signal
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
||||
uuid = item.item['uuid']
|
||||
# Get the signal and send it if it exists
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
# Send the watch_uuid parameter
|
||||
# NOTE: This would block other workers from .put/.get while this signal sends
|
||||
# Signal handlers may iterate the queue/datastore while holding locks
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
# Send queue_length signal with current queue size
|
||||
@@ -312,14 +313,15 @@ class AsyncSignalPriorityQueue(asyncio.PriorityQueue):
|
||||
async def put(self, item):
|
||||
# Call the parent's put method first
|
||||
await super().put(item)
|
||||
|
||||
|
||||
# After putting the item in the queue, check if it has a UUID and emit signal
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
||||
uuid = item.item['uuid']
|
||||
# Get the signal and send it if it exists
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
# Send the watch_uuid parameter
|
||||
# NOTE: This would block other workers from .put/.get while this signal sends
|
||||
# Signal handlers may iterate the queue/datastore while holding locks
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
# Send queue_length signal with current queue size
|
||||
|
||||
@@ -1,130 +0,0 @@
|
||||
import difflib
|
||||
from typing import List, Iterator, Union
|
||||
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
|
||||
#HTML_ADDED_STYLE = "background-color: #d2f7c2; color: #255d00;"
|
||||
#HTML_CHANGED_INTO_STYLE = "background-color: #dafbe1; color: #116329;"
|
||||
#HTML_CHANGED_STYLE = "background-color: #ffd6cc; color: #7a2000;"
|
||||
#HTML_REMOVED_STYLE = "background-color: #ffebe9; color: #82071e;"
|
||||
|
||||
# @todo - In the future we can make this configurable
|
||||
HTML_ADDED_STYLE = "background-color: #eaf2c2; color: #406619"
|
||||
HTML_REMOVED_STYLE = "background-color: #fadad7; color: #b30000"
|
||||
HTML_CHANGED_STYLE = HTML_REMOVED_STYLE
|
||||
HTML_CHANGED_INTO_STYLE = HTML_ADDED_STYLE
|
||||
|
||||
|
||||
# These get set to html or telegram type or discord compatible or whatever in handler.py
|
||||
# Something that cant get escaped to HTML by accident
|
||||
REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN'
|
||||
REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED'
|
||||
|
||||
ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN'
|
||||
ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN'
|
||||
CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN'
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
|
||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
|
||||
"""Return a slice of the list, or a single element if start == end."""
|
||||
return lst[start:end] if start != end else [lst[start]]
|
||||
|
||||
def customSequenceMatcher(
|
||||
before: List[str],
|
||||
after: List[str],
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True
|
||||
) -> Iterator[List[str]]:
|
||||
"""
|
||||
Compare two sequences and yield differences based on specified parameters.
|
||||
|
||||
Args:
|
||||
before (List[str]): Original sequence
|
||||
after (List[str]): Modified sequence
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
Yields:
|
||||
List[str]: Differences between sequences
|
||||
"""
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after)
|
||||
|
||||
|
||||
|
||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||
if include_equal and tag == 'equal':
|
||||
yield before[alo:ahi]
|
||||
elif include_removed and tag == 'delete':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)]
|
||||
else:
|
||||
yield same_slicer(before, alo, ahi)
|
||||
elif include_replaced and tag == 'replace':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] + \
|
||||
[f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
|
||||
elif include_added and tag == 'insert':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield same_slicer(after, blo, bhi)
|
||||
|
||||
|
||||
def render_diff(
|
||||
previous_version_file_contents: str,
|
||||
newest_version_file_contents: str,
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
line_feed_sep: str = "\n",
|
||||
include_change_type_prefix: bool = True,
|
||||
patch_format: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
Render the difference between two file contents.
|
||||
|
||||
Args:
|
||||
previous_version_file_contents (str): Original file contents
|
||||
newest_version_file_contents (str): Modified file contents
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
line_feed_sep (str): Separator for lines in output
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
patch_format (bool): Use patch format for output
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
"""
|
||||
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
|
||||
|
||||
if patch_format:
|
||||
patch = difflib.unified_diff(previous_lines, newest_lines)
|
||||
return line_feed_sep.join(patch)
|
||||
|
||||
rendered_diff = customSequenceMatcher(
|
||||
before=previous_lines,
|
||||
after=newest_lines,
|
||||
include_equal=include_equal,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix
|
||||
)
|
||||
|
||||
def flatten(lst: List[Union[str, List[str]]]) -> str:
|
||||
return line_feed_sep.join(flatten(x) if isinstance(x, list) else x for x in lst)
|
||||
|
||||
return flatten(rendered_diff)
|
||||
479
changedetectionio/diff/__init__.py
Normal file
479
changedetectionio/diff/__init__.py
Normal file
@@ -0,0 +1,479 @@
|
||||
"""
|
||||
Diff rendering module for change detection.
|
||||
|
||||
This module provides functions for rendering differences between text content,
|
||||
with support for various output formats and tokenization strategies.
|
||||
"""
|
||||
|
||||
import difflib
|
||||
from typing import List, Iterator, Union
|
||||
from loguru import logger
|
||||
import diff_match_patch as dmp_module
|
||||
import re
|
||||
import time
|
||||
|
||||
from .tokenizers import TOKENIZERS, tokenize_words_and_html
|
||||
|
||||
# Remember! gmail, outlook etc dont support <style> must be inline.
|
||||
# Gmail: strips <ins> and <del> tags entirely.
|
||||
# This is for the WHOLE line background style
|
||||
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
|
||||
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
|
||||
HTML_REMOVED_STYLE = REMOVED_STYLE # Export alias for handler.py
|
||||
HTML_ADDED_STYLE = ADDED_STYLE # Export alias for handler.py
|
||||
|
||||
# Darker backgrounds for nested highlighting (changed parts within lines)
|
||||
REMOVED_INNER_STYLE = "background-color: #ff867a; color: #111;"
|
||||
ADDED_INNER_STYLE = "background-color: #b2e841; color: #444;"
|
||||
HTML_CHANGED_STYLE = REMOVED_STYLE
|
||||
HTML_CHANGED_INTO_STYLE = ADDED_STYLE
|
||||
|
||||
# Placemarker constants - these get replaced by apply_service_tweaks() in handler.py
|
||||
# Something that cant get escaped to HTML by accident
|
||||
REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN'
|
||||
REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED'
|
||||
|
||||
ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN'
|
||||
ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN'
|
||||
CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN'
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
|
||||
# Compiled regex patterns for performance
|
||||
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
|
||||
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
|
||||
"""
|
||||
Render word-level differences between two lines inline using diff-match-patch library.
|
||||
|
||||
Args:
|
||||
before_line: Original line text
|
||||
after_line: Modified line text
|
||||
ignore_junk: Ignore whitespace-only changes
|
||||
markdown_style: Unused (kept for backwards compatibility)
|
||||
tokenizer: Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
|
||||
|
||||
Returns:
|
||||
tuple[str, bool]: (diff output with inline word-level highlighting, has_changes flag)
|
||||
"""
|
||||
# Normalize whitespace if ignore_junk is enabled
|
||||
if ignore_junk:
|
||||
# Normalize whitespace: replace multiple spaces/tabs with single space
|
||||
before_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', before_line)
|
||||
after_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', after_line)
|
||||
else:
|
||||
before_normalized = before_line
|
||||
after_normalized = after_line
|
||||
|
||||
# Use diff-match-patch with word-level tokenization
|
||||
# Strategy: Use linesToChars to treat words as atomic units
|
||||
dmp = dmp_module.diff_match_patch()
|
||||
|
||||
# Get the tokenizer function from the registry
|
||||
tokenizer_func = TOKENIZERS.get(tokenizer, tokenize_words_and_html)
|
||||
|
||||
# Tokenize both lines using the selected tokenizer
|
||||
before_tokens = tokenizer_func(before_normalized)
|
||||
after_tokens = tokenizer_func(after_normalized or ' ')
|
||||
|
||||
# Create mappings for linesToChars (using it for word-mode)
|
||||
# Join tokens with newline so each "line" is a token
|
||||
before_text = '\n'.join(before_tokens)
|
||||
after_text = '\n'.join(after_tokens)
|
||||
|
||||
# Use linesToChars for word-mode diffing
|
||||
lines_result = dmp.diff_linesToChars(before_text, after_text)
|
||||
line_before, line_after, line_array = lines_result
|
||||
|
||||
# Perform diff on the encoded strings
|
||||
diffs = dmp.diff_main(line_before, line_after, False)
|
||||
|
||||
# Convert back to original text
|
||||
dmp.diff_charsToLines(diffs, line_array)
|
||||
|
||||
# Remove the newlines we added for tokenization
|
||||
diffs = [(op, text.replace('\n', '')) for op, text in diffs]
|
||||
|
||||
# DON'T apply semantic cleanup here - it would break token boundaries
|
||||
# (e.g., "63" -> "66" would become "6" + "3" vs "6" + "6")
|
||||
# We want to preserve the tokenizer's word boundaries
|
||||
|
||||
# Check if there are any changes
|
||||
has_changes = any(op != 0 for op, _ in diffs)
|
||||
|
||||
if ignore_junk and not has_changes:
|
||||
return after_line, False
|
||||
|
||||
# Check if the whole line is replaced (no unchanged content)
|
||||
whole_line_replaced = not any(op == 0 and text.strip() for op, text in diffs)
|
||||
|
||||
# Build the output using placemarkers
|
||||
# When whole line is replaced, wrap entire removed content once and entire added content once
|
||||
if whole_line_replaced:
|
||||
removed_tokens = []
|
||||
added_tokens = []
|
||||
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal (e.g., whitespace tokens in common positions)
|
||||
# Include in both removed and added to preserve spacing
|
||||
removed_tokens.append(text)
|
||||
added_tokens.append(text)
|
||||
elif op == -1: # Deletion
|
||||
removed_tokens.append(text)
|
||||
elif op == 1: # Insertion
|
||||
added_tokens.append(text)
|
||||
|
||||
# Join all tokens and wrap the entire string once for removed, once for added
|
||||
result_parts = []
|
||||
|
||||
if removed_tokens:
|
||||
removed_full = ''.join(removed_tokens).rstrip()
|
||||
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
|
||||
if added_tokens:
|
||||
if result_parts: # Add newline between removed and added
|
||||
result_parts.append('\n')
|
||||
added_full = ''.join(added_tokens).rstrip()
|
||||
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
else:
|
||||
# Inline changes within the line
|
||||
result_parts = []
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal
|
||||
result_parts.append(text)
|
||||
elif op == 1: # Insertion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
elif op == -1: # Deletion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
|
||||
|
||||
def render_nested_line_diff(before_line: str, after_line: str, ignore_junk: bool = False, tokenizer: str = 'words_and_html') -> tuple[str, str, bool]:
|
||||
"""
|
||||
Render line-level differences with nested highlighting for changed parts.
|
||||
|
||||
Returns two separate lines:
|
||||
- Before line: light red background with dark red on removed parts
|
||||
- After line: light green background with dark green on added parts
|
||||
|
||||
Args:
|
||||
before_line: Original line text
|
||||
after_line: Modified line text
|
||||
ignore_junk: Ignore whitespace-only changes
|
||||
tokenizer: Name of tokenizer to use from TOKENIZERS registry
|
||||
|
||||
Returns:
|
||||
tuple[str, str, bool]: (before_with_highlights, after_with_highlights, has_changes)
|
||||
"""
|
||||
# Normalize whitespace if ignore_junk is enabled
|
||||
if ignore_junk:
|
||||
before_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', before_line)
|
||||
after_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', after_line)
|
||||
else:
|
||||
before_normalized = before_line
|
||||
after_normalized = after_line
|
||||
|
||||
# Use diff-match-patch with word-level tokenization
|
||||
dmp = dmp_module.diff_match_patch()
|
||||
|
||||
# Get the tokenizer function from the registry
|
||||
tokenizer_func = TOKENIZERS.get(tokenizer, tokenize_words_and_html)
|
||||
|
||||
# Tokenize both lines
|
||||
before_tokens = tokenizer_func(before_normalized)
|
||||
after_tokens = tokenizer_func(after_normalized or ' ')
|
||||
|
||||
# Create mappings for linesToChars
|
||||
before_text = '\n'.join(before_tokens)
|
||||
after_text = '\n'.join(after_tokens)
|
||||
|
||||
# Use linesToChars for word-mode diffing
|
||||
lines_result = dmp.diff_linesToChars(before_text, after_text)
|
||||
line_before, line_after, line_array = lines_result
|
||||
|
||||
# Perform diff on the encoded strings
|
||||
diffs = dmp.diff_main(line_before, line_after, False)
|
||||
|
||||
# Convert back to original text
|
||||
dmp.diff_charsToLines(diffs, line_array)
|
||||
|
||||
# Remove the newlines we added for tokenization
|
||||
diffs = [(op, text.replace('\n', '')) for op, text in diffs]
|
||||
|
||||
# DON'T apply semantic cleanup here - it would break token boundaries
|
||||
# (e.g., "63" -> "66" would become "6" + "3" vs "6" + "6")
|
||||
# We want to preserve the tokenizer's word boundaries
|
||||
|
||||
# Check if there are any changes
|
||||
has_changes = any(op != 0 for op, _ in diffs)
|
||||
|
||||
if ignore_junk and not has_changes:
|
||||
return before_line, after_line, False
|
||||
|
||||
# Build the before line (with nested highlighting for removed parts)
|
||||
before_parts = []
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal
|
||||
before_parts.append(text)
|
||||
elif op == -1: # Deletion (in before)
|
||||
before_parts.append(f'<span style="{REMOVED_INNER_STYLE}">{text}</span>')
|
||||
# Skip insertions (op == 1) for the before line
|
||||
|
||||
before_content = ''.join(before_parts)
|
||||
|
||||
# Build the after line (with nested highlighting for added parts)
|
||||
after_parts = []
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal
|
||||
after_parts.append(text)
|
||||
elif op == 1: # Insertion (in after)
|
||||
after_parts.append(f'<span style="{ADDED_INNER_STYLE}">{text}</span>')
|
||||
# Skip deletions (op == -1) for the after line
|
||||
|
||||
after_content = ''.join(after_parts)
|
||||
|
||||
# Wrap content with placemarkers (inner HTML highlighting is preserved)
|
||||
before_html = f'{CHANGED_PLACEMARKER_OPEN}{before_content}{CHANGED_PLACEMARKER_CLOSED}'
|
||||
after_html = f'{CHANGED_INTO_PLACEMARKER_OPEN}{after_content}{CHANGED_INTO_PLACEMARKER_CLOSED}'
|
||||
|
||||
return before_html, after_html, has_changes
|
||||
|
||||
|
||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
|
||||
"""Return a slice of the list, or a single element if start == end."""
|
||||
return lst[start:end] if start != end else [lst[start]]
|
||||
|
||||
def customSequenceMatcher(
|
||||
before: List[str],
|
||||
after: List[str],
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True,
|
||||
word_diff: bool = False,
|
||||
context_lines: int = 0,
|
||||
case_insensitive: bool = False,
|
||||
ignore_junk: bool = False,
|
||||
tokenizer: str = 'words_and_html'
|
||||
) -> Iterator[List[str]]:
|
||||
"""
|
||||
Compare two sequences and yield differences based on specified parameters.
|
||||
|
||||
Args:
|
||||
before (List[str]): Original sequence
|
||||
after (List[str]): Modified sequence
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
word_diff (bool): Use word-level diffing for replaced lines (controls inline rendering)
|
||||
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
|
||||
case_insensitive (bool): Perform case-insensitive comparison
|
||||
ignore_junk (bool): Ignore whitespace-only changes
|
||||
tokenizer (str): Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
|
||||
|
||||
Yields:
|
||||
List[str]: Differences between sequences
|
||||
"""
|
||||
# Prepare sequences for comparison (lowercase if case-insensitive, normalize whitespace if ignore_junk)
|
||||
def prepare_line(line):
|
||||
if case_insensitive:
|
||||
line = line.lower()
|
||||
if ignore_junk:
|
||||
# Normalize whitespace: replace multiple spaces/tabs with single space
|
||||
line = WHITESPACE_NORMALIZE_RE.sub(' ', line)
|
||||
return line
|
||||
|
||||
compare_before = [prepare_line(line) for line in before]
|
||||
compare_after = [prepare_line(line) for line in after]
|
||||
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=compare_before, b=compare_after)
|
||||
|
||||
# When context_lines is set and include_equal is False, we need to track which equal lines to include
|
||||
if context_lines > 0 and not include_equal:
|
||||
opcodes = list(cruncher.get_opcodes())
|
||||
# Mark equal ranges that should be included based on context
|
||||
included_equal_ranges = set()
|
||||
|
||||
for i, (tag, alo, ahi, blo, bhi) in enumerate(opcodes):
|
||||
if tag != 'equal':
|
||||
# Include context lines before this change
|
||||
for j in range(max(0, i - 1), i):
|
||||
if opcodes[j][0] == 'equal':
|
||||
prev_alo, prev_ahi = opcodes[j][1], opcodes[j][2]
|
||||
# Include last N lines of the previous equal block
|
||||
context_start = max(prev_alo, prev_ahi - context_lines)
|
||||
for line_num in range(context_start, prev_ahi):
|
||||
included_equal_ranges.add(line_num)
|
||||
|
||||
# Include context lines after this change
|
||||
for j in range(i + 1, min(len(opcodes), i + 2)):
|
||||
if opcodes[j][0] == 'equal':
|
||||
next_alo, next_ahi = opcodes[j][1], opcodes[j][2]
|
||||
# Include first N lines of the next equal block
|
||||
context_end = min(next_ahi, next_alo + context_lines)
|
||||
for line_num in range(next_alo, context_end):
|
||||
included_equal_ranges.add(line_num)
|
||||
|
||||
# Remember! gmail, outlook etc dont support <style> must be inline.
|
||||
# Gmail: strips <ins> and <del> tags entirely.
|
||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||
if tag == 'equal':
|
||||
if include_equal:
|
||||
yield before[alo:ahi]
|
||||
elif context_lines > 0:
|
||||
# Only include equal lines that are in the context range
|
||||
context_lines_to_include = [before[i] for i in range(alo, ahi) if i in included_equal_ranges]
|
||||
if context_lines_to_include:
|
||||
yield context_lines_to_include
|
||||
elif include_removed and tag == 'delete':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)]
|
||||
else:
|
||||
yield same_slicer(before, alo, ahi)
|
||||
elif include_replaced and tag == 'replace':
|
||||
before_lines = same_slicer(before, alo, ahi)
|
||||
after_lines = same_slicer(after, blo, bhi)
|
||||
|
||||
# Use inline word-level diff for single line replacements when word_diff is enabled
|
||||
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
|
||||
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
|
||||
if ignore_junk and not has_changes:
|
||||
# No real changes, skip this line
|
||||
continue
|
||||
yield [inline_diff]
|
||||
else:
|
||||
# Fall back to line-level diff for multi-line changes
|
||||
if include_change_type_prefix:
|
||||
yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in before_lines] + \
|
||||
[f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in after_lines]
|
||||
else:
|
||||
yield before_lines + after_lines
|
||||
elif include_added and tag == 'insert':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield same_slicer(after, blo, bhi)
|
||||
|
||||
def render_diff(
|
||||
previous_version_file_contents: str,
|
||||
newest_version_file_contents: str,
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True,
|
||||
patch_format: bool = False,
|
||||
word_diff: bool = True,
|
||||
context_lines: int = 0,
|
||||
case_insensitive: bool = False,
|
||||
ignore_junk: bool = False,
|
||||
tokenizer: str = 'words_and_html'
|
||||
) -> str:
|
||||
"""
|
||||
Render the difference between two file contents.
|
||||
|
||||
Args:
|
||||
previous_version_file_contents (str): Original file contents
|
||||
newest_version_file_contents (str): Modified file contents
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
patch_format (bool): Use patch format for output
|
||||
word_diff (bool): Use word-level diffing for replaced lines (controls inline rendering)
|
||||
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
|
||||
case_insensitive (bool): Perform case-insensitive comparison, By default the test_json_diff/process.py is case sensitive, so this follows same logic
|
||||
ignore_junk (bool): Ignore whitespace-only changes
|
||||
tokenizer (str): Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
|
||||
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
"""
|
||||
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
|
||||
now = time.time()
|
||||
logger.debug(
|
||||
f"diff options: "
|
||||
f"include_equal={include_equal}, "
|
||||
f"include_removed={include_removed}, "
|
||||
f"include_added={include_added}, "
|
||||
f"include_replaced={include_replaced}, "
|
||||
f"include_change_type_prefix={include_change_type_prefix}, "
|
||||
f"patch_format={patch_format}, "
|
||||
f"word_diff={word_diff}, "
|
||||
f"context_lines={context_lines}, "
|
||||
f"case_insensitive={case_insensitive}, "
|
||||
f"ignore_junk={ignore_junk}, "
|
||||
f"tokenizer={tokenizer}"
|
||||
)
|
||||
if patch_format:
|
||||
patch = difflib.unified_diff(previous_lines, newest_lines)
|
||||
return "\n".join(patch)
|
||||
|
||||
rendered_diff = customSequenceMatcher(
|
||||
before=previous_lines,
|
||||
after=newest_lines,
|
||||
include_equal=include_equal,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix,
|
||||
word_diff=word_diff,
|
||||
context_lines=context_lines,
|
||||
case_insensitive=case_insensitive,
|
||||
ignore_junk=ignore_junk,
|
||||
tokenizer=tokenizer
|
||||
)
|
||||
|
||||
def flatten(lst: List[Union[str, List[str]]]) -> str:
|
||||
result = []
|
||||
for x in lst:
|
||||
if isinstance(x, list):
|
||||
result.extend(x)
|
||||
else:
|
||||
result.append(x)
|
||||
return "\n".join(result)
|
||||
|
||||
logger.debug(f"Diff generated in {time.time() - now:.2f}s")
|
||||
|
||||
return flatten(rendered_diff)
|
||||
|
||||
|
||||
# Export main public API
|
||||
__all__ = [
|
||||
'render_diff',
|
||||
'customSequenceMatcher',
|
||||
'render_inline_word_diff',
|
||||
'render_nested_line_diff',
|
||||
'TOKENIZERS',
|
||||
'REMOVED_STYLE',
|
||||
'ADDED_STYLE',
|
||||
'REMOVED_INNER_STYLE',
|
||||
'ADDED_INNER_STYLE',
|
||||
]
|
||||
23
changedetectionio/diff/tokenizers/__init__.py
Normal file
23
changedetectionio/diff/tokenizers/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Tokenizers for diff operations.
|
||||
|
||||
This module provides various tokenization strategies for use with the diff system.
|
||||
New tokenizers can be easily added by:
|
||||
1. Creating a new module in this directory
|
||||
2. Importing and registering it in the TOKENIZERS dictionary below
|
||||
"""
|
||||
|
||||
from .natural_text import tokenize_words
|
||||
from .words_and_html import tokenize_words_and_html
|
||||
|
||||
# Tokenizer registry - maps tokenizer names to functions
|
||||
TOKENIZERS = {
|
||||
'words': tokenize_words,
|
||||
'words_and_html': tokenize_words_and_html,
|
||||
}
|
||||
|
||||
__all__ = [
|
||||
'tokenize_words',
|
||||
'tokenize_words_and_html',
|
||||
'TOKENIZERS',
|
||||
]
|
||||
44
changedetectionio/diff/tokenizers/natural_text.py
Normal file
44
changedetectionio/diff/tokenizers/natural_text.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Simple word tokenizer using whitespace boundaries.
|
||||
|
||||
This is a simpler tokenizer that treats all whitespace as token boundaries
|
||||
without special handling for HTML tags or other markup.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def tokenize_words(text: str) -> List[str]:
|
||||
"""
|
||||
Split text into words using simple whitespace boundaries.
|
||||
|
||||
This is a simpler tokenizer that treats all whitespace as token boundaries
|
||||
without special handling for HTML tags.
|
||||
|
||||
Args:
|
||||
text: Input text to tokenize
|
||||
|
||||
Returns:
|
||||
List of tokens (words and whitespace)
|
||||
|
||||
Examples:
|
||||
>>> tokenize_words("Hello world")
|
||||
['Hello', ' ', 'world']
|
||||
>>> tokenize_words("one two")
|
||||
['one', ' ', ' ', 'two']
|
||||
"""
|
||||
tokens = []
|
||||
current = ''
|
||||
|
||||
for char in text:
|
||||
if char.isspace():
|
||||
if current:
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
tokens.append(char)
|
||||
else:
|
||||
current += char
|
||||
|
||||
if current:
|
||||
tokens.append(current)
|
||||
return tokens
|
||||
61
changedetectionio/diff/tokenizers/words_and_html.py
Normal file
61
changedetectionio/diff/tokenizers/words_and_html.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
Tokenizer that preserves HTML tags as atomic units while splitting on whitespace.
|
||||
|
||||
This tokenizer is specifically designed for HTML content where:
|
||||
- HTML tags should remain intact (e.g., '<p>', '<a href="...">')
|
||||
- Whitespace tokens are preserved for accurate diff reconstruction
|
||||
- Words are split on whitespace boundaries
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def tokenize_words_and_html(text: str) -> List[str]:
|
||||
"""
|
||||
Split text into words and boundaries (spaces, HTML tags).
|
||||
|
||||
This tokenizer preserves HTML tags as atomic units while splitting on whitespace.
|
||||
Useful for content that contains HTML markup.
|
||||
|
||||
Args:
|
||||
text: Input text to tokenize
|
||||
|
||||
Returns:
|
||||
List of tokens (words, spaces, HTML tags)
|
||||
|
||||
Examples:
|
||||
>>> tokenize_words_and_html("<p>Hello world</p>")
|
||||
['<p>', 'Hello', ' ', 'world', '</p>']
|
||||
>>> tokenize_words_and_html("<a href='test.com'>link</a>")
|
||||
['<a href=\\'test.com\\'>', 'link', '</a>']
|
||||
"""
|
||||
tokens = []
|
||||
current = ''
|
||||
in_tag = False
|
||||
|
||||
for char in text:
|
||||
if char == '<':
|
||||
# Start of HTML tag
|
||||
if current:
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
current = '<'
|
||||
in_tag = True
|
||||
elif char == '>' and in_tag:
|
||||
# End of HTML tag
|
||||
current += '>'
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
in_tag = False
|
||||
elif char.isspace() and not in_tag:
|
||||
# Space outside of tag
|
||||
if current:
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
tokens.append(char)
|
||||
else:
|
||||
current += char
|
||||
|
||||
if current:
|
||||
tokens.append(current)
|
||||
return tokens
|
||||
43
changedetectionio/favicon_utils.py
Normal file
43
changedetectionio/favicon_utils.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""
|
||||
Favicon utilities for changedetection.io
|
||||
Handles favicon MIME type detection with caching
|
||||
"""
|
||||
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
@lru_cache(maxsize=1000)
|
||||
def get_favicon_mime_type(filepath):
|
||||
"""
|
||||
Detect MIME type of favicon by reading file content using puremagic.
|
||||
Results are cached to avoid repeatedly reading the same files.
|
||||
|
||||
Args:
|
||||
filepath: Full path to the favicon file
|
||||
|
||||
Returns:
|
||||
MIME type string (e.g., 'image/png')
|
||||
"""
|
||||
mime = None
|
||||
|
||||
try:
|
||||
import puremagic
|
||||
with open(filepath, 'rb') as f:
|
||||
content_bytes = f.read(200) # Read first 200 bytes
|
||||
|
||||
detections = puremagic.magic_string(content_bytes)
|
||||
if detections:
|
||||
mime = detections[0].mime_type
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to mimetypes if puremagic fails
|
||||
if not mime:
|
||||
import mimetypes
|
||||
mime, _ = mimetypes.guess_type(filepath)
|
||||
|
||||
# Final fallback based on extension
|
||||
if not mime:
|
||||
mime = 'image/x-icon' if filepath.endswith('.ico') else 'image/png'
|
||||
|
||||
return mime
|
||||
@@ -9,6 +9,7 @@ import threading
|
||||
import time
|
||||
import timeago
|
||||
from blinker import signal
|
||||
from pathlib import Path
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
@@ -23,10 +24,10 @@ from flask import (
|
||||
render_template,
|
||||
request,
|
||||
send_from_directory,
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
from flask_login import current_user
|
||||
from flask_restful import abort, Api
|
||||
from flask_cors import CORS
|
||||
|
||||
@@ -34,13 +35,18 @@ from flask_cors import CORS
|
||||
# Make this a global singleton to avoid multiple signal objects
|
||||
watch_check_update = signal('watch_check_update', doc='Signal sent when a watch check is completed')
|
||||
from flask_wtf import CSRFProtect
|
||||
from flask_babel import Babel, gettext, get_locale
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio import __version__
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||
from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
IN_PYTEST = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -51,7 +57,7 @@ extra_stylesheets = []
|
||||
# Use bulletproof janus-based queues for sync/async reliability
|
||||
update_q = RecheckPriorityQueue()
|
||||
notification_q = NotificationQueue()
|
||||
MAX_QUEUE_SIZE = 2000
|
||||
MAX_QUEUE_SIZE = 5000
|
||||
|
||||
app = Flask(__name__,
|
||||
static_url_path="",
|
||||
@@ -65,7 +71,15 @@ socketio_server = None
|
||||
CORS(app)
|
||||
|
||||
# Super handy for compressing large BrowserSteps responses and others
|
||||
FlaskCompress(app)
|
||||
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak
|
||||
compress = FlaskCompress()
|
||||
app.config['COMPRESS_MIN_SIZE'] = 2096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
||||
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
||||
compress.init_app(app)
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
||||
|
||||
|
||||
# Stop browser caching of assets
|
||||
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
|
||||
@@ -76,11 +90,43 @@ app.config['NEW_VERSION_AVAILABLE'] = False
|
||||
if os.getenv('FLASK_SERVER_NAME'):
|
||||
app.config['SERVER_NAME'] = os.getenv('FLASK_SERVER_NAME')
|
||||
|
||||
# Babel/i18n configuration
|
||||
app.config['BABEL_TRANSLATION_DIRECTORIES'] = str(Path(__file__).parent / 'translations')
|
||||
app.config['BABEL_DEFAULT_LOCALE'] = 'en_GB'
|
||||
|
||||
# Session configuration
|
||||
# NOTE: Flask session (for locale, etc.) is separate from Flask-Login's remember-me cookie
|
||||
# - Flask session stores data like session['locale'] in a signed cookie
|
||||
# - Flask-Login's remember=True creates a separate authentication cookie
|
||||
# - Setting PERMANENT_SESSION_LIFETIME controls how long the Flask session cookie lasts
|
||||
from datetime import timedelta
|
||||
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=3650) # ~10 years (effectively unlimited)
|
||||
|
||||
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
|
||||
|
||||
# Disables caching of the templates
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = True
|
||||
|
||||
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
|
||||
|
||||
# Configure Jinja2 to search for templates in plugin directories
|
||||
def _configure_plugin_templates():
|
||||
"""Configure Jinja2 loader to include plugin template directories."""
|
||||
from jinja2 import ChoiceLoader, FileSystemLoader
|
||||
from changedetectionio.pluggy_interface import get_plugin_template_paths
|
||||
|
||||
# Get plugin template paths
|
||||
plugin_template_paths = get_plugin_template_paths()
|
||||
|
||||
if plugin_template_paths:
|
||||
# Create a ChoiceLoader that searches app templates first, then plugin templates
|
||||
loaders = [app.jinja_loader] # Keep the default app loader first
|
||||
for path in plugin_template_paths:
|
||||
loaders.append(FileSystemLoader(path))
|
||||
|
||||
app.jinja_loader = ChoiceLoader(loaders)
|
||||
logger.info(f"Configured Jinja2 to search {len(plugin_template_paths)} plugin template directories")
|
||||
|
||||
# Configure plugin templates (called after plugins are loaded)
|
||||
_configure_plugin_templates()
|
||||
csrf = CSRFProtect()
|
||||
csrf.init_app(app)
|
||||
notification_debug_log=[]
|
||||
@@ -101,12 +147,12 @@ def init_app_secret(datastore_path):
|
||||
path = os.path.join(datastore_path, "secret.txt")
|
||||
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
with open(path, "r", encoding='utf-8') as f:
|
||||
secret = f.read()
|
||||
|
||||
except FileNotFoundError:
|
||||
import secrets
|
||||
with open(path, "w") as f:
|
||||
with open(path, "w", encoding='utf-8') as f:
|
||||
secret = secrets.token_hex(32)
|
||||
f.write(secret)
|
||||
|
||||
@@ -183,16 +229,26 @@ def _get_worker_status_info():
|
||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
|
||||
if watch_obj['last_checked'] == 0:
|
||||
return 'Not yet'
|
||||
return gettext('Not yet')
|
||||
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time())
|
||||
locale = get_timeago_locale(str(get_locale()))
|
||||
try:
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time(), locale)
|
||||
except:
|
||||
# Fallback to English if locale not supported by timeago
|
||||
return timeago.format(int(watch_obj['last_checked']), time.time(), 'en')
|
||||
|
||||
@app.template_filter('format_timestamp_timeago')
|
||||
def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
|
||||
if not timestamp:
|
||||
return 'Not yet'
|
||||
return gettext('Not yet')
|
||||
|
||||
return timeago.format(int(timestamp), time.time())
|
||||
locale = get_timeago_locale(str(get_locale()))
|
||||
try:
|
||||
return timeago.format(int(timestamp), time.time(), locale)
|
||||
except:
|
||||
# Fallback to English if locale not supported by timeago
|
||||
return timeago.format(int(timestamp), time.time(), 'en')
|
||||
|
||||
|
||||
@app.template_filter('pagination_slice')
|
||||
@@ -206,10 +262,78 @@ def _jinja2_filter_pagination_slice(arr, skip):
|
||||
@app.template_filter('format_seconds_ago')
|
||||
def _jinja2_filter_seconds_precise(timestamp):
|
||||
if timestamp == False:
|
||||
return 'Not yet'
|
||||
return gettext('Not yet')
|
||||
|
||||
return format(int(time.time()-timestamp), ',d')
|
||||
|
||||
@app.template_filter('fetcher_status_icons')
|
||||
def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
"""Get status icon HTML for a given fetcher.
|
||||
|
||||
This filter checks both built-in fetchers and plugin fetchers for status icons.
|
||||
|
||||
Args:
|
||||
fetcher_name: The fetcher name (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
str: HTML string containing status icon elements
|
||||
"""
|
||||
from changedetectionio import content_fetchers
|
||||
from changedetectionio.pluggy_interface import collect_fetcher_status_icons
|
||||
from markupsafe import Markup
|
||||
from flask import url_for
|
||||
|
||||
icon_data = None
|
||||
|
||||
# First check if it's a plugin fetcher (plugins have priority)
|
||||
plugin_icon_data = collect_fetcher_status_icons(fetcher_name)
|
||||
if plugin_icon_data:
|
||||
icon_data = plugin_icon_data
|
||||
# Check if it's a built-in fetcher
|
||||
elif hasattr(content_fetchers, fetcher_name):
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name)
|
||||
if hasattr(fetcher_class, 'get_status_icon_data'):
|
||||
icon_data = fetcher_class.get_status_icon_data()
|
||||
|
||||
# Build HTML from icon data
|
||||
if icon_data and isinstance(icon_data, dict):
|
||||
# Use 'group' from icon_data if specified, otherwise default to 'images'
|
||||
group = icon_data.get('group', 'images')
|
||||
|
||||
# Try to use url_for, but fall back to manual URL building if endpoint not registered yet
|
||||
try:
|
||||
icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
|
||||
except:
|
||||
# Fallback: build URL manually respecting APPLICATION_ROOT
|
||||
from flask import request
|
||||
app_root = request.script_root if hasattr(request, 'script_root') else ''
|
||||
icon_url = f"{app_root}/static/{group}/{icon_data['filename']}"
|
||||
|
||||
style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
|
||||
html = f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>'
|
||||
return Markup(html)
|
||||
|
||||
return ''
|
||||
|
||||
@app.template_filter('sanitize_tag_class')
|
||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
"""Sanitize a tag title to create a valid CSS class name.
|
||||
Removes all non-alphanumeric characters and converts to lowercase.
|
||||
|
||||
Args:
|
||||
tag_title: The tag title string
|
||||
|
||||
Returns:
|
||||
str: A sanitized string suitable for use as a CSS class name
|
||||
"""
|
||||
import re
|
||||
# Remove all non-alphanumeric characters and convert to lowercase
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
|
||||
# Ensure it starts with a letter (CSS requirement)
|
||||
if sanitized and not sanitized[0].isalpha():
|
||||
sanitized = 'tag' + sanitized
|
||||
return sanitized if sanitized else 'tag'
|
||||
|
||||
# Import login_optionally_required from auth_decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -264,6 +388,16 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
global datastore, socketio_server
|
||||
datastore = datastore_o
|
||||
|
||||
# Set datastore reference in notification queue for all_muted checking
|
||||
notification_q.set_datastore(datastore)
|
||||
|
||||
# Import and create a wrapper for is_safe_url that has access to app
|
||||
from changedetectionio.is_safe_url import is_safe_url as _is_safe_url
|
||||
|
||||
def is_safe_url(target):
|
||||
"""Wrapper for is_safe_url that passes the app instance"""
|
||||
return _is_safe_url(target, app)
|
||||
|
||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE'] = datastore_o
|
||||
@@ -274,7 +408,45 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
login_manager = flask_login.LoginManager(app)
|
||||
login_manager.login_view = 'login'
|
||||
app.secret_key = init_app_secret(config['datastore_path'])
|
||||
|
||||
|
||||
# Initialize Flask-Babel for i18n support
|
||||
available_languages = get_available_languages()
|
||||
language_codes = get_language_codes()
|
||||
|
||||
def get_locale():
|
||||
# Locale aliases: map browser language codes to translation directory names
|
||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
||||
locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
return session['locale']
|
||||
|
||||
# 2. Fall back to Accept-Language header
|
||||
# Get the best match from browser's Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
||||
|
||||
# 3. Check if we need to map the browser locale to our internal locale
|
||||
if browser_locale in locale_aliases:
|
||||
return locale_aliases[browser_locale]
|
||||
|
||||
return browser_locale
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
|
||||
# Make i18n functions available to templates
|
||||
app.jinja_env.globals.update(
|
||||
_=gettext,
|
||||
get_locale=get_locale,
|
||||
get_flag_for_locale=get_flag_for_locale,
|
||||
available_languages=available_languages
|
||||
)
|
||||
|
||||
# Set up a request hook to check authentication for all routes
|
||||
@app.before_request
|
||||
def check_authentication():
|
||||
@@ -285,6 +457,12 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if request.endpoint and request.endpoint == 'static_content' and request.view_args:
|
||||
# Handled by static_content handler
|
||||
return None
|
||||
# Permitted - static flag icons need to load on login page
|
||||
elif request.endpoint and request.endpoint == 'static_flags':
|
||||
return None
|
||||
# Permitted - language selection should work on login page
|
||||
elif request.endpoint and request.endpoint == 'set_language':
|
||||
return None
|
||||
# Permitted
|
||||
elif request.endpoint and 'login' in request.endpoint:
|
||||
return None
|
||||
@@ -307,6 +485,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
return login_manager.unauthorized()
|
||||
|
||||
|
||||
watch_api.add_resource(WatchHistoryDiff,
|
||||
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchSingleHistory,
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
@@ -350,25 +531,76 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
@login_manager.unauthorized_handler
|
||||
def unauthorized_handler():
|
||||
flash("You must be logged in, please log in.", 'error')
|
||||
return redirect(url_for('login', next=url_for('watchlist.index')))
|
||||
# Pass the current request path so users are redirected back after login
|
||||
return redirect(url_for('login', redirect=request.path))
|
||||
|
||||
@app.route('/logout')
|
||||
def logout():
|
||||
flask_login.logout_user()
|
||||
|
||||
# Check if there's a redirect parameter to return to after re-login
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, pass it to login page
|
||||
if redirect_url and is_safe_url(redirect_url):
|
||||
return redirect(url_for('login', redirect=redirect_url))
|
||||
|
||||
# Otherwise just go to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@app.route('/set-language/<locale>')
|
||||
def set_language(locale):
|
||||
"""Set the user's preferred language in the session"""
|
||||
if not request.cookies:
|
||||
logger.error("Cannot set language without session cookie")
|
||||
flash("Cannot set language without session cookie", 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Validate the locale against available languages
|
||||
if locale in language_codes:
|
||||
# Make session permanent so language preference persists across browser sessions
|
||||
# NOTE: This is the Flask session cookie (separate from Flask-Login's remember-me auth cookie)
|
||||
session.permanent = True
|
||||
session['locale'] = locale
|
||||
|
||||
# CRITICAL: Flask-Babel caches the locale in the request context (ctx.babel_locale)
|
||||
# We must refresh to clear this cache so the new locale takes effect immediately
|
||||
# This is especially important for tests where multiple requests happen rapidly
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
else:
|
||||
logger.error(f"Invalid locale {locale}, available: {language_codes}")
|
||||
|
||||
# Check if there's a redirect parameter to return to the same page
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, use it
|
||||
if redirect_url and is_safe_url(redirect_url):
|
||||
return redirect(redirect_url)
|
||||
|
||||
# Otherwise redirect to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# https://github.com/pallets/flask/blob/93dd1709d05a1cf0e886df6223377bdab3b077fb/examples/tutorial/flaskr/__init__.py#L39
|
||||
# You can divide up the stuff like this
|
||||
@app.route('/login', methods=['GET', 'POST'])
|
||||
def login():
|
||||
# Extract and validate the redirect parameter
|
||||
redirect_url = request.args.get('redirect') or request.form.get('redirect')
|
||||
|
||||
# Validate the redirect URL - default to watchlist if invalid
|
||||
if redirect_url and is_safe_url(redirect_url):
|
||||
validated_redirect = redirect_url
|
||||
else:
|
||||
validated_redirect = url_for('watchlist.index')
|
||||
|
||||
if request.method == 'GET':
|
||||
if flask_login.current_user.is_authenticated:
|
||||
flash("Already logged in")
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
output = render_template("login.html")
|
||||
# Already logged in - redirect immediately to the target
|
||||
flash(gettext("Already logged in"))
|
||||
return redirect(validated_redirect)
|
||||
flash(gettext("You must be logged in, please log in."), 'error')
|
||||
output = render_template("login.html", redirect_url=validated_redirect)
|
||||
return output
|
||||
|
||||
user = User()
|
||||
@@ -378,23 +610,13 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
if (user.check_password(password)):
|
||||
flask_login.login_user(user, remember=True)
|
||||
|
||||
# For now there's nothing else interesting here other than the index/list page
|
||||
# It's more reliable and safe to ignore the 'next' redirect
|
||||
# When we used...
|
||||
# next = request.args.get('next')
|
||||
# return redirect(next or url_for('watchlist.index'))
|
||||
# We would sometimes get login loop errors on sites hosted in sub-paths
|
||||
|
||||
# note for the future:
|
||||
# if not is_safe_valid_url(next):
|
||||
# return flask.abort(400)
|
||||
return redirect(url_for('watchlist.index'))
|
||||
# Redirect to the validated URL after successful login
|
||||
return redirect(validated_redirect)
|
||||
|
||||
else:
|
||||
flash('Incorrect password', 'error')
|
||||
flash(gettext('Incorrect password'), 'error')
|
||||
|
||||
return redirect(url_for('login'))
|
||||
return redirect(url_for('login', redirect=redirect_url if redirect_url else None))
|
||||
|
||||
@app.before_request
|
||||
def before_request_handle_cookie_x_settings():
|
||||
@@ -404,6 +626,40 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
app.config['SESSION_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
|
||||
return None
|
||||
|
||||
@app.route("/static/flags/<path:flag_path>", methods=['GET'])
|
||||
def static_flags(flag_path):
|
||||
"""Handle flag icon files with subdirectories"""
|
||||
from flask import make_response
|
||||
import re
|
||||
|
||||
# flag_path comes in as "1x1/de.svg" or "4x3/de.svg"
|
||||
if re.match(r'^(1x1|4x3)/[a-z0-9-]+\.svg$', flag_path.lower()):
|
||||
# Reconstruct the path safely with additional validation
|
||||
parts = flag_path.lower().split('/')
|
||||
if len(parts) != 2:
|
||||
abort(404)
|
||||
|
||||
subdir = parts[0]
|
||||
svg_file = parts[1]
|
||||
|
||||
# Extra validation: ensure subdir is exactly 1x1 or 4x3
|
||||
if subdir not in ['1x1', '4x3']:
|
||||
abort(404)
|
||||
|
||||
# Extra validation: ensure svg_file only contains safe characters
|
||||
if not re.match(r'^[a-z0-9-]+\.svg$', svg_file):
|
||||
abort(404)
|
||||
|
||||
try:
|
||||
response = make_response(send_from_directory(f"static/flags/{subdir}", svg_file))
|
||||
response.headers['Content-type'] = 'image/svg+xml'
|
||||
response.headers['Cache-Control'] = 'max-age=86400, public' # Cache for 24 hours
|
||||
return response
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
else:
|
||||
abort(404)
|
||||
|
||||
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
|
||||
def static_content(group, filename):
|
||||
from flask import make_response
|
||||
@@ -443,16 +699,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
@@ -488,6 +737,31 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
# Handle plugin group specially
|
||||
if group == 'plugin':
|
||||
# Serve files from plugin static directories
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
import os as os_check
|
||||
|
||||
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
|
||||
if hasattr(plugin_obj, 'plugin_static_path'):
|
||||
try:
|
||||
static_path = plugin_obj.plugin_static_path()
|
||||
if static_path and os_check.path.isdir(static_path):
|
||||
# Check if file exists in plugin's static directory
|
||||
plugin_file_path = os_check.path.join(static_path, filename)
|
||||
if os_check.path.isfile(plugin_file_path):
|
||||
# Found the file in a plugin
|
||||
response = make_response(send_from_directory(static_path, filename))
|
||||
response.headers['Cache-Control'] = 'max-age=3600, public' # Cache for 1 hour
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.debug(f"Error checking plugin {plugin_name} for static file: {e}")
|
||||
pass
|
||||
|
||||
# File not found in any plugin
|
||||
abort(404)
|
||||
|
||||
# These files should be in our subdirectory
|
||||
try:
|
||||
return send_from_directory(f"static/{group}", path=filename)
|
||||
@@ -629,13 +903,13 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||
threading.Thread(target=notification_runner).start()
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
threading.Thread(target=notification_runner, daemon=True, name="NotificationRunner").start()
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version).start()
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
|
||||
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
|
||||
# This avoids circular dependencies
|
||||
@@ -680,7 +954,7 @@ def notification_runner():
|
||||
# At the moment only one thread runs (single runner)
|
||||
n_object = notification_q.get(block=False)
|
||||
except queue.Empty:
|
||||
time.sleep(1)
|
||||
app.config.exit.wait(1)
|
||||
|
||||
else:
|
||||
|
||||
@@ -717,7 +991,7 @@ def notification_runner():
|
||||
app.config['watch_check_update_SIGNAL'].send(app_context=app, watch_uuid=n_object.get('uuid'))
|
||||
|
||||
# Process notifications
|
||||
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
|
||||
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%c"), json.dumps(sent_obj))]
|
||||
# Trim the log length
|
||||
notification_debug_log = notification_debug_log[-100:]
|
||||
|
||||
@@ -733,6 +1007,10 @@ def ticker_thread_check_time_launch_checks():
|
||||
logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}")
|
||||
|
||||
# Workers are now started during app initialization, not here
|
||||
WAIT_TIME_BETWEEN_LOOP = 1.0 if not IN_PYTEST else 0.01
|
||||
if IN_PYTEST:
|
||||
# The time between loops should be less than the first .sleep/wait in def wait_for_all_checks() of tests/util.py
|
||||
logger.warning(f"Looks like we're in PYTEST! Setting time between searching for items to add to the queue to {WAIT_TIME_BETWEEN_LOOP}s")
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
|
||||
@@ -750,12 +1028,20 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
if health_result['status'] != 'healthy':
|
||||
logger.warning(f"Worker health check: {health_result['message']}")
|
||||
|
||||
|
||||
last_health_check = now
|
||||
|
||||
# Check if all checks are paused
|
||||
if datastore.data['settings']['application'].get('all_paused', False):
|
||||
app.config.exit.wait(1)
|
||||
continue
|
||||
|
||||
# Get a list of watches by UUID that are currently fetching data
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
# Build set of queued UUIDs once for O(1) lookup instead of O(n) per watch
|
||||
queued_uuids = {q_item.item['uuid'] for q_item in update_q.queue}
|
||||
|
||||
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
|
||||
watch_uuid_list = []
|
||||
while True:
|
||||
@@ -772,16 +1058,17 @@ def ticker_thread_check_time_launch_checks():
|
||||
else:
|
||||
break
|
||||
|
||||
# Re #438 - Don't place more watches in the queue to be checked if the queue is already large
|
||||
while update_q.qsize() >= 2000:
|
||||
logger.warning(f"Recheck watches queue size limit reached ({MAX_QUEUE_SIZE}), skipping adding more items")
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
recheck_time_system_seconds = int(datastore.threshold_seconds)
|
||||
|
||||
# Check for watches outside of the time threshold to put in the thread queue.
|
||||
for uuid in watch_uuid_list:
|
||||
for watch_index, uuid in enumerate(watch_uuid_list):
|
||||
# Re #438 - Check queue size every 100 watches for CPU efficiency (not every watch)
|
||||
if watch_index % 100 == 0:
|
||||
current_queue_size = update_q.qsize()
|
||||
if current_queue_size >= MAX_QUEUE_SIZE:
|
||||
logger.debug(f"Queue size limit reached ({current_queue_size}/{MAX_QUEUE_SIZE}), stopping scheduler this iteration.")
|
||||
break
|
||||
|
||||
now = time.time()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
@@ -831,7 +1118,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
seconds_since_last_recheck = now - watch['last_checked']
|
||||
|
||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||
if not uuid in running_uuids and uuid not in [q_uuid.item['uuid'] for q_uuid in update_q.queue]:
|
||||
if not uuid in running_uuids and uuid not in queued_uuids:
|
||||
|
||||
# Proxies can be set to have a limit on seconds between which they can be called
|
||||
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
@@ -873,8 +1160,5 @@ def ticker_thread_check_time_launch_checks():
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
# Wait before checking the list again - saves CPU
|
||||
time.sleep(1)
|
||||
|
||||
# Should be low so we can break this out in testing
|
||||
app.config.exit.wait(1)
|
||||
app.config.exit.wait(WAIT_TIME_BETWEEN_LOOP)
|
||||
|
||||
@@ -2,16 +2,21 @@ import os
|
||||
import re
|
||||
from loguru import logger
|
||||
from wtforms.widgets.core import TimeInput
|
||||
from flask_babel import lazy_gettext as _l, gettext
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
|
||||
from changedetectionio.conditions.form import ConditionFormRow
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS, \
|
||||
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio import processors
|
||||
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
Form,
|
||||
Field,
|
||||
FloatField,
|
||||
IntegerField,
|
||||
RadioField,
|
||||
SelectField,
|
||||
@@ -55,8 +60,8 @@ valid_method = {
|
||||
|
||||
default_method = 'GET'
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.'
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT=_l('At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.')
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT=_l('At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.')
|
||||
|
||||
class StringListField(StringField):
|
||||
widget = widgets.TextArea()
|
||||
@@ -156,7 +161,7 @@ class TimeStringField(Field):
|
||||
time_str = valuelist[0]
|
||||
# Simple validation for HH:MM format
|
||||
if not time_str or len(time_str.split(":")) != 2:
|
||||
raise ValidationError("Invalid time format. Use HH:MM.")
|
||||
raise ValidationError(_l("Invalid time format. Use HH:MM."))
|
||||
self.data = time_str
|
||||
|
||||
|
||||
@@ -172,15 +177,15 @@ class validateTimeZoneName(object):
|
||||
from zoneinfo import available_timezones
|
||||
python_timezones = available_timezones()
|
||||
if field.data and field.data not in python_timezones:
|
||||
raise ValidationError("Not a valid timezone name")
|
||||
raise ValidationError(_l("Not a valid timezone name"))
|
||||
|
||||
class ScheduleLimitDaySubForm(Form):
|
||||
enabled = BooleanField("not set", default=True)
|
||||
start_time = TimeStringField("Start At", default="00:00", validators=[validators.Optional()])
|
||||
duration = FormField(TimeDurationForm, label="Run duration")
|
||||
enabled = BooleanField(_l("not set"), default=True)
|
||||
start_time = TimeStringField(_l("Start At"), default="00:00", validators=[validators.Optional()])
|
||||
duration = FormField(TimeDurationForm, label=_l("Run duration"))
|
||||
|
||||
class ScheduleLimitForm(Form):
|
||||
enabled = BooleanField("Use time scheduler", default=False)
|
||||
enabled = BooleanField(_l("Use time scheduler"), default=False)
|
||||
# Because the label for=""" doesnt line up/work with the actual checkbox
|
||||
monday = FormField(ScheduleLimitDaySubForm, label="")
|
||||
tuesday = FormField(ScheduleLimitDaySubForm, label="")
|
||||
@@ -190,7 +195,7 @@ class ScheduleLimitForm(Form):
|
||||
saturday = FormField(ScheduleLimitDaySubForm, label="")
|
||||
sunday = FormField(ScheduleLimitDaySubForm, label="")
|
||||
|
||||
timezone = StringField("Optional timezone to run in",
|
||||
timezone = StringField(_l("Optional timezone to run in"),
|
||||
render_kw={"list": "timezones"},
|
||||
validators=[validateTimeZoneName()]
|
||||
)
|
||||
@@ -204,13 +209,13 @@ class ScheduleLimitForm(Form):
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
||||
self.monday.form.enabled.label.text="Monday"
|
||||
self.tuesday.form.enabled.label.text = "Tuesday"
|
||||
self.wednesday.form.enabled.label.text = "Wednesday"
|
||||
self.thursday.form.enabled.label.text = "Thursday"
|
||||
self.friday.form.enabled.label.text = "Friday"
|
||||
self.saturday.form.enabled.label.text = "Saturday"
|
||||
self.sunday.form.enabled.label.text = "Sunday"
|
||||
self.monday.form.enabled.label.text=_l("Monday")
|
||||
self.tuesday.form.enabled.label.text = _l("Tuesday")
|
||||
self.wednesday.form.enabled.label.text = _l("Wednesday")
|
||||
self.thursday.form.enabled.label.text = _l("Thursday")
|
||||
self.friday.form.enabled.label.text = _l("Friday")
|
||||
self.saturday.form.enabled.label.text = _l("Saturday")
|
||||
self.sunday.form.enabled.label.text = _l("Sunday")
|
||||
|
||||
|
||||
def validate_time_between_check_has_values(form):
|
||||
@@ -235,7 +240,7 @@ class RequiredTimeInterval(object):
|
||||
Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]).
|
||||
"""
|
||||
def __init__(self, message=None):
|
||||
self.message = message or 'At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
||||
self.message = message or _l('At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.')
|
||||
|
||||
def __call__(self, form, field):
|
||||
if not validate_time_between_check_has_values(field.form):
|
||||
@@ -243,11 +248,11 @@ class RequiredTimeInterval(object):
|
||||
|
||||
|
||||
class TimeBetweenCheckForm(Form):
|
||||
weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
hours = IntegerField('Hours', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
minutes = IntegerField('Minutes', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
weeks = IntegerField(_l('Weeks'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
days = IntegerField(_l('Days'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
hours = IntegerField(_l('Hours'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
minutes = IntegerField(_l('Minutes'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
seconds = IntegerField(_l('Seconds'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
# @todo add total seconds minimum validatior = minimum_seconds_recheck_time
|
||||
|
||||
def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
|
||||
@@ -503,7 +508,9 @@ class ValidateJinja2Template(object):
|
||||
jinja2_env = create_jinja_env(loader=BaseLoader)
|
||||
|
||||
# Add notification tokens for validation
|
||||
jinja2_env.globals.update(NotificationContextData())
|
||||
static_token_placeholders = NotificationContextData()
|
||||
static_token_placeholders.set_random_for_validation()
|
||||
jinja2_env.globals.update(static_token_placeholders)
|
||||
if hasattr(field, 'extra_notification_tokens'):
|
||||
jinja2_env.globals.update(field.extra_notification_tokens)
|
||||
|
||||
@@ -715,18 +722,16 @@ class ValidateStartsWithRegex(object):
|
||||
if not stripped:
|
||||
if self.allow_empty:
|
||||
continue
|
||||
raise ValidationError(self.message or "Empty value not allowed.")
|
||||
raise ValidationError(self.message or _l("Empty value not allowed."))
|
||||
if not self.pattern.match(stripped):
|
||||
raise ValidationError(self.message or "Invalid value.")
|
||||
raise ValidationError(self.message or _l("Invalid value."))
|
||||
|
||||
class quickWatchForm(Form):
|
||||
from . import processors
|
||||
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()])
|
||||
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
|
||||
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
||||
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
# Common to a single watch and the global settings
|
||||
@@ -739,14 +744,14 @@ class commonSettingsForm(Form):
|
||||
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
|
||||
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_format = SelectField('Notification format', choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
|
||||
scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
|
||||
fetch_backend = RadioField(_l('Fetch Method'), choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
notification_body = TextAreaField(_l('Notification Body'), default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))])
|
||||
|
||||
# Not true anymore but keep the validate_ hook for future use, we convert color tags
|
||||
# def validate_notification_urls(self, field):
|
||||
@@ -758,30 +763,30 @@ class commonSettingsForm(Form):
|
||||
|
||||
|
||||
class importForm(Form):
|
||||
from . import processors
|
||||
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
|
||||
urls = TextAreaField('URLs')
|
||||
xlsx_file = FileField('Upload .xlsx file', validators=[FileAllowed(['xlsx'], 'Must be .xlsx file!')])
|
||||
file_mapping = SelectField('File mapping', [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
urls = TextAreaField(_l('URLs'))
|
||||
xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))])
|
||||
file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||
|
||||
class SingleBrowserStep(Form):
|
||||
|
||||
operation = SelectField('Operation', [validators.Optional()], choices=browser_step_ui_config.keys())
|
||||
operation = SelectField(_l('Operation'), [validators.Optional()], choices=browser_step_ui_config.keys())
|
||||
|
||||
# maybe better to set some <script>var..
|
||||
selector = StringField('Selector', [validators.Optional()], render_kw={"placeholder": "CSS or xPath selector"})
|
||||
optional_value = StringField('value', [validators.Optional()], render_kw={"placeholder": "Value"})
|
||||
selector = StringField(_l('Selector'), [validators.Optional()], render_kw={"placeholder": "CSS or xPath selector"})
|
||||
optional_value = StringField(_l('value'), [validators.Optional()], render_kw={"placeholder": "Value"})
|
||||
# @todo move to JS? ajax fetch new field?
|
||||
# remove_button = SubmitField('-', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'})
|
||||
# add_button = SubmitField('+', render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'})
|
||||
# remove_button = SubmitField(_l('-'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Remove'})
|
||||
# add_button = SubmitField(_l('+'), render_kw={"type": "button", "class": "pure-button pure-button-primary", 'title': 'Add new step after'})
|
||||
|
||||
class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()], default='')
|
||||
url = fields.URLField('Web Page URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group Tag', [validators.Optional()], default='')
|
||||
|
||||
time_between_check = EnhancedFormField(
|
||||
TimeBetweenCheckForm,
|
||||
label=_l('Time Between Check'),
|
||||
conditional_field='time_between_check_use_default',
|
||||
conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT,
|
||||
conditional_test_function=validate_time_between_check_has_values
|
||||
@@ -789,49 +794,48 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
|
||||
time_between_check_use_default = BooleanField('Use global settings for time between check and scheduler.', default=False)
|
||||
time_between_check_use_default = BooleanField(_l('Use global settings for time between check and scheduler.'), default=False)
|
||||
|
||||
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
||||
include_filters = StringListField(_l('CSS/JSONPath/JQ/XPath Filters'), [ValidateCSSJSONXPATHInput()], default='')
|
||||
|
||||
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
|
||||
extract_text = StringListField('Extract text', [ValidateListRegex()])
|
||||
extract_text = StringListField(_l('Extract text'), [ValidateListRegex()])
|
||||
|
||||
title = StringField('Title', default='')
|
||||
title = StringField(_l('Title'), default='')
|
||||
|
||||
ignore_text = StringListField('Ignore lines containing', [ValidateListRegex()])
|
||||
ignore_text = StringListField(_l('Ignore lines containing'), [ValidateListRegex()])
|
||||
headers = StringDictKeyValue('Request headers')
|
||||
body = TextAreaField('Request body', [validators.Optional()])
|
||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
|
||||
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
|
||||
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
|
||||
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
|
||||
strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None)
|
||||
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
|
||||
body = TextAreaField(_l('Request body'), [validators.Optional()])
|
||||
method = SelectField(_l('Request method'), choices=valid_method, default=default_method)
|
||||
ignore_status_codes = BooleanField(_l('Ignore status codes (process non-2xx status codes as normal)'), default=False)
|
||||
check_unique_lines = BooleanField(_l('Only trigger when unique lines appear in all history'), default=False)
|
||||
remove_duplicate_lines = BooleanField(_l('Remove duplicate lines of text'), default=False)
|
||||
sort_text_alphabetically = BooleanField(_l('Sort text alphabetically'), default=False)
|
||||
strip_ignored_lines = TernaryNoneBooleanField(_l('Strip ignored lines'), default=None)
|
||||
trim_text_whitespace = BooleanField(_l('Trim whitespace before and after text'), default=False)
|
||||
|
||||
filter_text_added = BooleanField('Added lines', default=True)
|
||||
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
||||
filter_text_removed = BooleanField('Removed lines', default=True)
|
||||
filter_text_added = BooleanField(_l('Added lines'), default=True)
|
||||
filter_text_replaced = BooleanField(_l('Replaced/changed lines'), default=True)
|
||||
filter_text_removed = BooleanField(_l('Removed lines'), default=True)
|
||||
|
||||
trigger_text = StringListField('Keyword triggers - Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
trigger_text = StringListField(_l('Keyword triggers - Trigger/wait for text'), [validators.Optional(), ValidateListRegex()])
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
text_should_not_be_present = StringListField(_l('Block change-detection while text matches'), [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField(_l('Execute JavaScript before change detection'), render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
save_button = SubmitField(_l('Save'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
proxy = RadioField('Proxy')
|
||||
proxy = RadioField(_l('Proxy'))
|
||||
# filter_failure_notification_send @todo make ternary
|
||||
filter_failure_notification_send = BooleanField(
|
||||
'Send a notification when the filter can no longer be found on the page', default=False)
|
||||
notification_muted = TernaryNoneBooleanField('Notifications', default=None, yes_text="Muted", no_text="On")
|
||||
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
||||
filter_failure_notification_send = BooleanField(_l('Send a notification when the filter can no longer be found on the page'), default=False)
|
||||
notification_muted = TernaryNoneBooleanField(_l('Notifications'), default=None, yes_text=_l("Muted"), no_text=_l("On"))
|
||||
notification_screenshot = BooleanField(_l('Attach screenshot to notification (where possible)'), default=False)
|
||||
|
||||
conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL')
|
||||
conditions_match_logic = RadioField(_l('Match'), choices=[('ALL', _l('Match all of the following')),('ANY', _l('Match any of the following'))], default='ALL')
|
||||
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
||||
use_page_title_in_list = TernaryNoneBooleanField('Use page <title> in list', default=None)
|
||||
use_page_title_in_list = TernaryNoneBooleanField(_l('Use page <title> in list'), default=None)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return None
|
||||
@@ -848,7 +852,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
# Fail form validation when a body is set for a GET
|
||||
if self.method.data == 'GET' and self.body.data:
|
||||
self.body.errors.append('Body must be empty when Request Method is set to GET')
|
||||
self.body.errors.append(gettext('Body must be empty when Request Method is set to GET'))
|
||||
result = False
|
||||
|
||||
# Attempt to validate jinja2 templates in the URL
|
||||
@@ -857,11 +861,11 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
except ModuleNotFoundError as e:
|
||||
# incase jinja2_time or others is missing
|
||||
logger.error(e)
|
||||
self.url.errors.append(f'Invalid template syntax configuration: {e}')
|
||||
self.url.errors.append(gettext('Invalid template syntax configuration: %(error)s') % {'error': e})
|
||||
result = False
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.url.errors.append(f'Invalid template syntax: {e}')
|
||||
self.url.errors.append(gettext('Invalid template syntax: %(error)s') % {'error': e})
|
||||
result = False
|
||||
|
||||
# Attempt to validate jinja2 templates in the body
|
||||
@@ -871,11 +875,11 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
except ModuleNotFoundError as e:
|
||||
# incase jinja2_time or others is missing
|
||||
logger.error(e)
|
||||
self.body.errors.append(f'Invalid template syntax configuration: {e}')
|
||||
self.body.errors.append(gettext('Invalid template syntax configuration: %(error)s') % {'error': e})
|
||||
result = False
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.body.errors.append(f'Invalid template syntax: {e}')
|
||||
self.body.errors.append(gettext('Invalid template syntax: %(error)s') % {'error': e})
|
||||
result = False
|
||||
|
||||
# Attempt to validate jinja2 templates in the headers
|
||||
@@ -886,11 +890,11 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
except ModuleNotFoundError as e:
|
||||
# incase jinja2_time or others is missing
|
||||
logger.error(e)
|
||||
self.headers.errors.append(f'Invalid template syntax configuration: {e}')
|
||||
self.headers.errors.append(gettext('Invalid template syntax configuration: %(error)s') % {'error': e})
|
||||
result = False
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.headers.errors.append(f'Invalid template syntax in "{header}" header: {e}')
|
||||
self.headers.errors.append(gettext('Invalid template syntax in \"%(header)s\" header: %(error)s') % {'header': header, 'error': e})
|
||||
result = False
|
||||
|
||||
return result
|
||||
@@ -914,106 +918,122 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
class SingleExtraProxy(Form):
|
||||
# maybe better to set some <script>var..
|
||||
proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
proxy_url = StringField('Proxy URL', [
|
||||
proxy_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
proxy_url = StringField(_l('Proxy URL'), [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(https?|socks5)://', # ✅ main pattern
|
||||
flags=re.IGNORECASE, # ✅ makes it case-insensitive
|
||||
message='Proxy URLs must start with http://, https:// or socks5://',
|
||||
message=_l('Proxy URLs must start with http://, https:// or socks5://'),
|
||||
),
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
|
||||
|
||||
class SingleExtraBrowser(Form):
|
||||
browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
browser_connection_url = StringField('Browser connection URL', [
|
||||
browser_name = StringField(_l('Name'), [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
browser_connection_url = StringField(_l('Browser connection URL'), [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(wss?|ws)://',
|
||||
flags=re.IGNORECASE,
|
||||
message='Browser URLs must start with wss:// or ws://'
|
||||
message=_l('Browser URLs must start with wss:// or ws://')
|
||||
),
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||
|
||||
class DefaultUAInputForm(Form):
|
||||
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
html_requests = StringField(_l('Plaintext requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"):
|
||||
html_webdriver = StringField('Chrome requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
html_webdriver = StringField(_l('Chrome requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm)
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm, label=_l('Time Between Check'))
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
proxy = RadioField('Default proxy')
|
||||
jitter_seconds = IntegerField('Random jitter seconds ± check',
|
||||
proxy = RadioField(_l('Default proxy'))
|
||||
jitter_seconds = IntegerField(_l('Random jitter seconds ± check'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
validators=[validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))])
|
||||
|
||||
workers = IntegerField('Number of fetch workers',
|
||||
workers = IntegerField(_l('Number of fetch workers'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=1, max=50,
|
||||
message="Should be between 1 and 50")])
|
||||
message=_l("Should be between 1 and 50"))])
|
||||
|
||||
timeout = IntegerField('Requests timeout in seconds',
|
||||
timeout = IntegerField(_l('Requests timeout in seconds'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=1, max=999,
|
||||
message="Should be between 1 and 999")])
|
||||
message=_l("Should be between 1 and 999"))])
|
||||
|
||||
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
||||
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
||||
|
||||
default_ua = FormField(DefaultUAInputForm, label="Default User-Agent overrides")
|
||||
default_ua = FormField(DefaultUAInputForm, label=_l("Default User-Agent overrides"))
|
||||
|
||||
def validate_extra_proxies(self, extra_validators=None):
|
||||
for e in self.data['extra_proxies']:
|
||||
if e.get('proxy_name') or e.get('proxy_url'):
|
||||
if not e.get('proxy_name','').strip() or not e.get('proxy_url','').strip():
|
||||
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
|
||||
self.extra_proxies.errors.append(gettext('Both a name, and a Proxy URL is required.'))
|
||||
return False
|
||||
|
||||
class globalSettingsApplicationUIForm(Form):
|
||||
open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()])
|
||||
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
|
||||
favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
|
||||
use_page_title_in_list = BooleanField('Use page <title> in watch overview list') #BooleanField=True
|
||||
open_diff_in_new_tab = BooleanField(_l("Open 'History' page in a new tab"), default=True, validators=[validators.Optional()])
|
||||
socket_io_enabled = BooleanField(_l('Realtime UI Updates Enabled'), default=True, validators=[validators.Optional()])
|
||||
favicons_enabled = BooleanField(_l('Favicons Enabled'), default=True, validators=[validators.Optional()])
|
||||
use_page_title_in_list = BooleanField(_l('Use page <title> in watch overview list')) #BooleanField=True
|
||||
|
||||
# datastore.data['settings']['application']..
|
||||
class globalSettingsApplicationForm(commonSettingsForm):
|
||||
|
||||
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
|
||||
base_url = StringField('Notification base URL override',
|
||||
api_access_token_enabled = BooleanField(_l('API access token security check enabled'), default=True, validators=[validators.Optional()])
|
||||
base_url = StringField(_l('Notification base URL override'),
|
||||
validators=[validators.Optional()],
|
||||
render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
|
||||
)
|
||||
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
|
||||
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||
password = SaltyPasswordField()
|
||||
pager_size = IntegerField('Pager size',
|
||||
empty_pages_are_a_change = BooleanField(_l('Treat empty pages as a change?'), default=False)
|
||||
fetch_backend = RadioField(_l('Fetch Method'), default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
global_ignore_text = StringListField(_l('Ignore Text'), [ValidateListRegex()])
|
||||
global_subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
ignore_whitespace = BooleanField(_l('Ignore whitespace'))
|
||||
|
||||
# Screenshot comparison settings
|
||||
min_change_percentage = FloatField(
|
||||
'Screenshot: Minimum Change Percentage',
|
||||
validators=[
|
||||
validators.Optional(),
|
||||
validators.NumberRange(min=0.0, max=100.0, message=_l('Must be between 0 and 100'))
|
||||
],
|
||||
default=0.1,
|
||||
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
|
||||
)
|
||||
|
||||
password = SaltyPasswordField(_l('Password'))
|
||||
pager_size = IntegerField(_l('Pager size'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message="Should be atleast zero (disabled)")])
|
||||
message=_l("Should be atleast zero (disabled)"))])
|
||||
|
||||
rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES)
|
||||
rss_content_format = SelectField(_l('RSS Content format'), choices=list(RSS_FORMAT_TYPES.items()))
|
||||
rss_template_type = SelectField(_l('RSS <description> body built from'), choices=list(RSS_TEMPLATE_TYPE_OPTIONS.items()))
|
||||
rss_template_override = TextAreaField(_l('RSS "System default" template override'), render_kw={"rows": "5", "placeholder": RSS_TEMPLATE_HTML_DEFAULT}, validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||
shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()])
|
||||
strip_ignored_lines = BooleanField('Strip ignored lines')
|
||||
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
|
||||
removepassword_button = SubmitField(_l('Remove password'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
render_anchor_tag_content = BooleanField(_l('Render anchor tag content'), default=False)
|
||||
shared_diff_access = BooleanField(_l('Allow anonymous access to watch history page when password is enabled'), default=False, validators=[validators.Optional()])
|
||||
strip_ignored_lines = BooleanField(_l('Strip ignored lines'))
|
||||
rss_hide_muted_watches = BooleanField(_l('Hide muted watches from RSS feed'), default=True,
|
||||
validators=[validators.Optional()])
|
||||
|
||||
rss_reader_mode = BooleanField('RSS reader mode ', default=False,
|
||||
validators=[validators.Optional()])
|
||||
rss_reader_mode = BooleanField(_l('Enable RSS reader mode '), default=False, validators=[validators.Optional()])
|
||||
rss_diff_length = IntegerField(label=_l('Number of changes to show in watch RSS feed'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0, message=_l("Should contain zero or more attempts"))])
|
||||
|
||||
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
||||
filter_failure_notification_threshold_attempts = IntegerField(_l('Number of times the filter can be missing before sending a notification'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message="Should contain zero or more attempts")])
|
||||
message=_l("Should contain zero or more attempts"))])
|
||||
ui = FormField(globalSettingsApplicationUIForm)
|
||||
|
||||
|
||||
@@ -1029,9 +1049,9 @@ class globalSettingsForm(Form):
|
||||
|
||||
requests = FormField(globalSettingsRequestForm)
|
||||
application = FormField(globalSettingsApplicationForm)
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
save_button = SubmitField(_l('Save'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
class extractDataForm(Form):
|
||||
extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
|
||||
extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"})
|
||||
extract_regex = StringField(_l('RegEx to extract'), validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
|
||||
extract_submit_button = SubmitField(_l('Extract as CSV'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
@@ -172,99 +172,131 @@ def elementpath_tostring(obj):
|
||||
return str(obj)
|
||||
|
||||
# Return str Utf-8 of matched rules
|
||||
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
|
||||
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_xml=False):
|
||||
"""
|
||||
|
||||
:param xpath_filter:
|
||||
:param html_content:
|
||||
:param append_pretty_line_formatting:
|
||||
:param is_xml: set to true if is XML or is RSS (RSS is XML)
|
||||
:return:
|
||||
"""
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
parser = etree.HTMLParser()
|
||||
if is_rss:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
|
||||
html_block = ""
|
||||
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
# Solution: Register the default namespace with empty string prefix in elementpath
|
||||
# This is primarily for RSS/Atom feeds but works for any XML with default namespace
|
||||
if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
|
||||
# Register the default namespace with empty string prefix for elementpath
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
if type(r) != list:
|
||||
r = [r]
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
if type(element) == str:
|
||||
html_block += element
|
||||
elif issubclass(type(element), etree._Element) or issubclass(type(element), etree._ElementTree):
|
||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
|
||||
tree = None
|
||||
try:
|
||||
if is_xml:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
|
||||
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
tree = html.fromstring(html_content, parser=parser)
|
||||
html_block = ""
|
||||
|
||||
return html_block
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
# Solution: Register the default namespace with empty string prefix in elementpath
|
||||
# This is primarily for RSS/Atom feeds but works for any XML with default namespace
|
||||
if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
|
||||
# Register the default namespace with empty string prefix for elementpath
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
if type(r) != list:
|
||||
r = [r]
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
if type(element) == str:
|
||||
html_block += element
|
||||
elif issubclass(type(element), etree._Element) or issubclass(type(element), etree._ElementTree):
|
||||
# Use 'xml' method for RSS/XML content, 'html' for HTML content
|
||||
# parser will be XMLParser if we detected XML content
|
||||
method = 'xml' if (is_xml or isinstance(parser, etree.XMLParser)) else 'html'
|
||||
html_block += etree.tostring(element, pretty_print=True, method=method, encoding='unicode')
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
# lxml trees can hold significant memory, especially with large documents
|
||||
if tree is not None:
|
||||
tree.clear()
|
||||
|
||||
# Return str Utf-8 of matched rules
|
||||
# 'xpath1:'
|
||||
def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
|
||||
def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_xml=False):
|
||||
from lxml import etree, html
|
||||
|
||||
parser = None
|
||||
if is_rss:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
|
||||
html_block = ""
|
||||
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
|
||||
# For documents with default namespace (RSS/Atom feeds), users must use:
|
||||
# - local-name(): //*[local-name()='title']/text()
|
||||
# - Or use xpath_filter (not xpath1_filter) which supports default namespaces
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
|
||||
#@note: xpath1 (lxml) does NOT automatically handle default namespaces
|
||||
#@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
# Some kind of text, UTF-8 or other
|
||||
if isinstance(element, (str, bytes)):
|
||||
html_block += element
|
||||
tree = None
|
||||
try:
|
||||
if is_xml:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
|
||||
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
|
||||
else:
|
||||
# Return the HTML which will get parsed as text
|
||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
|
||||
tree = html.fromstring(html_content, parser=parser)
|
||||
html_block = ""
|
||||
|
||||
return html_block
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
|
||||
# For documents with default namespace (RSS/Atom feeds), users must use:
|
||||
# - local-name(): //*[local-name()='title']/text()
|
||||
# - Or use xpath_filter (not xpath1_filter) which supports default namespaces
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
|
||||
#@note: xpath1 (lxml) does NOT automatically handle default namespaces
|
||||
#@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||
# (This way each 'match' reliably has a new-line in the diff)
|
||||
# Divs are converted to 4 whitespaces by inscriptis
|
||||
if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||
|
||||
# Some kind of text, UTF-8 or other
|
||||
if isinstance(element, (str, bytes)):
|
||||
html_block += element
|
||||
else:
|
||||
# Return the HTML/XML which will get parsed as text
|
||||
# Use 'xml' method for RSS/XML content, 'html' for HTML content
|
||||
# parser will be XMLParser if we detected XML content
|
||||
method = 'xml' if (is_xml or isinstance(parser, etree.XMLParser)) else 'html'
|
||||
html_block += etree.tostring(element, pretty_print=True, method=method, encoding='unicode')
|
||||
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
# lxml trees can hold significant memory, especially with large documents
|
||||
if tree is not None:
|
||||
tree.clear()
|
||||
|
||||
# Extract/find element
|
||||
def extract_element(find='title', html_content=''):
|
||||
@@ -432,6 +464,9 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
||||
ignore_regex_multiline = []
|
||||
ignored_lines = []
|
||||
|
||||
if not content:
|
||||
return ''
|
||||
|
||||
for k in wordlist:
|
||||
# Skip empty strings to avoid matching everything
|
||||
if not k or not k.strip():
|
||||
@@ -504,6 +539,18 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
|
||||
|
||||
|
||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
|
||||
"""
|
||||
Convert HTML content to plain text using inscriptis.
|
||||
|
||||
Thread-Safety: This function uses inscriptis.get_text() which internally calls
|
||||
lxml.html.fromstring() with the default parser. Testing with 50 concurrent threads
|
||||
confirms this approach is thread-safe and produces deterministic output.
|
||||
|
||||
Alternative Approach Rejected: An explicit HTMLParser instance (thread-local or fresh)
|
||||
would also be thread-safe, but was found to break change detection logic in subtle ways
|
||||
(test_check_basic_change_detection_functionality). The default parser provides correct
|
||||
and reliable behavior.
|
||||
"""
|
||||
from inscriptis import get_text
|
||||
from inscriptis.model.config import ParserConfig
|
||||
|
||||
|
||||
113
changedetectionio/is_safe_url.py
Normal file
113
changedetectionio/is_safe_url.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""
|
||||
URL redirect validation module for preventing open redirect vulnerabilities.
|
||||
|
||||
This module provides functionality to safely validate redirect URLs, ensuring they:
|
||||
1. Point to internal routes only (no external redirects)
|
||||
2. Are properly normalized (preventing browser parsing differences)
|
||||
3. Match registered Flask routes (no fake/non-existent pages)
|
||||
4. Are fully logged for security monitoring
|
||||
|
||||
References:
|
||||
- https://flask-login.readthedocs.io/ (safe redirect patterns)
|
||||
- https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-v-user-logins
|
||||
- https://www.pythonkitchen.com/how-prevent-open-redirect-vulnerab-flask/
|
||||
"""
|
||||
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from flask import request
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def is_safe_url(target, app):
|
||||
"""
|
||||
Validate that a redirect URL is safe to prevent open redirect vulnerabilities.
|
||||
|
||||
This follows Flask/Werkzeug best practices by ensuring the redirect URL:
|
||||
1. Is a relative path starting with exactly one '/'
|
||||
2. Does not start with '//' (double-slash attack)
|
||||
3. Has no external protocol handlers
|
||||
4. Points to a valid registered route in the application
|
||||
5. Is properly normalized to prevent browser parsing differences
|
||||
|
||||
Args:
|
||||
target: The URL to validate (e.g., '/settings', '/login#top')
|
||||
app: The Flask application instance (needed for route validation)
|
||||
|
||||
Returns:
|
||||
bool: True if the URL is safe for redirection, False otherwise
|
||||
|
||||
Examples:
|
||||
>>> is_safe_url('/settings', app)
|
||||
True
|
||||
>>> is_safe_url('//evil.com', app)
|
||||
False
|
||||
>>> is_safe_url('/settings#general', app)
|
||||
True
|
||||
>>> is_safe_url('/fake-page', app)
|
||||
False
|
||||
"""
|
||||
if not target:
|
||||
return False
|
||||
|
||||
# Normalize the URL to prevent browser parsing differences
|
||||
# Strip whitespace and replace backslashes (which some browsers interpret as forward slashes)
|
||||
target = target.strip()
|
||||
target = target.replace('\\', '/')
|
||||
|
||||
# First, check if it starts with // or more (double-slash attack)
|
||||
if target.startswith('//'):
|
||||
logger.warning(f"Blocked redirect attempt with double-slash: {target}")
|
||||
return False
|
||||
|
||||
# Parse the URL to check for scheme and netloc
|
||||
parsed = urlparse(target)
|
||||
|
||||
# Block any URL with a scheme (http://, https://, javascript:, etc.)
|
||||
if parsed.scheme:
|
||||
logger.warning(f"Blocked redirect attempt with scheme: {target}")
|
||||
return False
|
||||
|
||||
# Block any URL with a network location (netloc)
|
||||
# This catches patterns like //evil.com, user@host, etc.
|
||||
if parsed.netloc:
|
||||
logger.warning(f"Blocked redirect attempt with netloc: {target}")
|
||||
return False
|
||||
|
||||
# At this point, we have a relative URL with no scheme or netloc
|
||||
# Use urljoin to resolve it and verify it points to the same host
|
||||
ref_url = urlparse(request.host_url)
|
||||
test_url = urlparse(urljoin(request.host_url, target))
|
||||
|
||||
# Check: ensure the resolved URL has the same netloc as current host
|
||||
if not (test_url.scheme in ('http', 'https') and ref_url.netloc == test_url.netloc):
|
||||
logger.warning(f"Blocked redirect attempt with mismatched netloc: {target}")
|
||||
return False
|
||||
|
||||
# Additional validation: Check if the URL matches a registered route
|
||||
# This prevents redirects to non-existent pages or unintended endpoints
|
||||
try:
|
||||
# Get the path without query string and fragment
|
||||
# Fragments (like #general) are automatically stripped by urlparse
|
||||
path = parsed.path
|
||||
|
||||
# Create a URL adapter bound to the server name
|
||||
adapter = app.url_map.bind(ref_url.netloc)
|
||||
|
||||
# Try to match the path to a registered route
|
||||
# This will raise NotFound if the route doesn't exist
|
||||
endpoint, values = adapter.match(path, return_rule=False)
|
||||
|
||||
# Block redirects to static file endpoints - these are catch-all routes
|
||||
# that would match arbitrary paths, potentially allowing unintended redirects
|
||||
if endpoint in ('static_content', 'static', 'static_flags'):
|
||||
logger.warning(f"Blocked redirect to static endpoint: {target}")
|
||||
return False
|
||||
|
||||
# Successfully matched a valid route
|
||||
logger.debug(f"Validated safe redirect to endpoint '{endpoint}': {target}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
# Route doesn't exist or can't be matched
|
||||
logger.warning(f"Blocked redirect to non-existent route: {target} (error: {e})")
|
||||
return False
|
||||
110
changedetectionio/languages.py
Normal file
110
changedetectionio/languages.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
Language configuration for i18n support
|
||||
Automatically discovers available languages from translations directory
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def get_timeago_locale(flask_locale):
|
||||
"""
|
||||
Convert Flask-Babel locale codes to timeago library locale codes.
|
||||
|
||||
The Python timeago library (https://github.com/hustcc/timeago) supports 48 locales
|
||||
but uses different naming conventions than Flask-Babel. This function maps between them.
|
||||
|
||||
Notable differences:
|
||||
- Chinese: Flask uses 'zh', timeago uses 'zh_CN'
|
||||
- Portuguese: Flask uses 'pt', timeago uses 'pt_PT' or 'pt_BR'
|
||||
- Swedish: Flask uses 'sv', timeago uses 'sv_SE'
|
||||
- Norwegian: Flask uses 'no', timeago uses 'nb_NO' or 'nn_NO'
|
||||
- Hindi: Flask uses 'hi', timeago uses 'in_HI'
|
||||
- Czech: Flask uses 'cs', but timeago doesn't support Czech - fallback to English
|
||||
|
||||
Args:
|
||||
flask_locale (str): Flask-Babel locale code (e.g., 'cs', 'zh', 'pt')
|
||||
|
||||
Returns:
|
||||
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
|
||||
"""
|
||||
locale_map = {
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
}
|
||||
return locale_map.get(flask_locale, flask_locale)
|
||||
|
||||
# Language metadata: flag icon CSS class and native name
|
||||
# Using flag-icons library: https://flagicons.lipis.dev/
|
||||
LANGUAGE_DATA = {
|
||||
'en_GB': {'flag': 'fi fi-gb fis', 'name': 'English (UK)'},
|
||||
'en_US': {'flag': 'fi fi-us fis', 'name': 'English (US)'},
|
||||
'de': {'flag': 'fi fi-de fis', 'name': 'Deutsch'},
|
||||
'fr': {'flag': 'fi fi-fr fis', 'name': 'Français'},
|
||||
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
|
||||
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
|
||||
'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
|
||||
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
|
||||
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
|
||||
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
|
||||
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
|
||||
'zh_Hant_TW': {'flag': 'fi fi-tw fis', 'name': '繁體中文'},
|
||||
'ru': {'flag': 'fi fi-ru fis', 'name': 'Русский'},
|
||||
'pl': {'flag': 'fi fi-pl fis', 'name': 'Polski'},
|
||||
'nl': {'flag': 'fi fi-nl fis', 'name': 'Nederlands'},
|
||||
'sv': {'flag': 'fi fi-se fis', 'name': 'Svenska'},
|
||||
'da': {'flag': 'fi fi-dk fis', 'name': 'Dansk'},
|
||||
'no': {'flag': 'fi fi-no fis', 'name': 'Norsk'},
|
||||
'fi': {'flag': 'fi fi-fi fis', 'name': 'Suomi'},
|
||||
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
|
||||
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
|
||||
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
|
||||
}
|
||||
|
||||
|
||||
def get_available_languages():
|
||||
"""
|
||||
Discover available languages by scanning the translations directory
|
||||
Returns a dict of available languages with their metadata
|
||||
"""
|
||||
translations_dir = Path(__file__).parent / 'translations'
|
||||
|
||||
available = {}
|
||||
|
||||
# Scan for translation directories
|
||||
if translations_dir.exists():
|
||||
for lang_dir in translations_dir.iterdir():
|
||||
if lang_dir.is_dir() and lang_dir.name in LANGUAGE_DATA:
|
||||
# Check if messages.po exists
|
||||
po_file = lang_dir / 'LC_MESSAGES' / 'messages.po'
|
||||
if po_file.exists():
|
||||
available[lang_dir.name] = LANGUAGE_DATA[lang_dir.name]
|
||||
|
||||
# If no English variants found, fall back to adding en_GB as default
|
||||
if 'en_GB' not in available and 'en_US' not in available:
|
||||
available['en_GB'] = LANGUAGE_DATA['en_GB']
|
||||
|
||||
return available
|
||||
|
||||
|
||||
def get_language_codes():
|
||||
"""Get list of available language codes"""
|
||||
return list(get_available_languages().keys())
|
||||
|
||||
|
||||
def get_flag_for_locale(locale):
|
||||
"""Get flag emoji for a locale, or globe if unknown"""
|
||||
return LANGUAGE_DATA.get(locale, {}).get('flag', '🌐')
|
||||
|
||||
|
||||
def get_name_for_locale(locale):
|
||||
"""Get native name for a locale"""
|
||||
return LANGUAGE_DATA.get(locale, {}).get('name', locale.upper())
|
||||
@@ -1,7 +1,7 @@
|
||||
from os import getenv
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
|
||||
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
@@ -37,6 +37,8 @@ class model(dict):
|
||||
},
|
||||
'application': {
|
||||
# Custom notification content
|
||||
'all_paused': False,
|
||||
'all_muted': False,
|
||||
'api_access_token_enabled': True,
|
||||
'base_url' : None,
|
||||
'empty_pages_are_a_change': False,
|
||||
@@ -46,6 +48,7 @@ class model(dict):
|
||||
'global_subtractive_selectors': [],
|
||||
'ignore_whitespace': True,
|
||||
'ignore_status_codes': False, #@todo implement, as ternary.
|
||||
'ssim_threshold': '0.96', # Default SSIM threshold for screenshot comparison
|
||||
'notification_body': default_notification_body,
|
||||
'notification_format': default_notification_format,
|
||||
'notification_title': default_notification_title,
|
||||
@@ -54,7 +57,10 @@ class model(dict):
|
||||
'password': False,
|
||||
'render_anchor_tag_content': False,
|
||||
'rss_access_token': None,
|
||||
'rss_content_format': RSS_FORMAT_TYPES[0][0],
|
||||
'rss_content_format': RSS_CONTENT_FORMAT_DEFAULT,
|
||||
'rss_template_type': 'system_default',
|
||||
'rss_template_override': None,
|
||||
'rss_diff_length': 5,
|
||||
'rss_hide_muted_watches': True,
|
||||
'rss_reader_mode': False,
|
||||
'scheduler_timezone_default': None, # Default IANA timezone name
|
||||
@@ -81,7 +87,7 @@ class model(dict):
|
||||
|
||||
def parse_headers_from_text_file(filepath):
|
||||
headers = {}
|
||||
with open(filepath, 'r') as f:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
for l in f.readlines():
|
||||
l = l.strip()
|
||||
if not l.startswith('#') and ':' in l:
|
||||
|
||||
@@ -10,15 +10,70 @@ from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
from .. import jinja2_custom as safe_jinja
|
||||
from ..diff import ADDED_PLACEMARKER_OPEN
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
Save compressed data using native brotli.
|
||||
Testing shows no memory leak when using gc.collect() after compression.
|
||||
|
||||
Args:
|
||||
contents: data to compress (str or bytes)
|
||||
filepath: destination file path
|
||||
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
|
||||
fallback_uncompressed: if True, save uncompressed on failure; if False, raise exception
|
||||
|
||||
Returns:
|
||||
str: actual filepath saved (may differ from input if fallback used)
|
||||
|
||||
Raises:
|
||||
Exception: if compression fails and fallback_uncompressed is False
|
||||
"""
|
||||
import brotli
|
||||
import gc
|
||||
|
||||
# Ensure contents are bytes
|
||||
if isinstance(contents, str):
|
||||
contents = contents.encode('utf-8')
|
||||
|
||||
try:
|
||||
logger.debug(f"Starting brotli compression of {len(contents)} bytes.")
|
||||
|
||||
if mode is not None:
|
||||
compressed_data = brotli.compress(contents, mode=mode)
|
||||
else:
|
||||
compressed_data = brotli.compress(contents)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(compressed_data)
|
||||
|
||||
logger.debug(f"Finished brotli compression - From {len(contents)} to {len(compressed_data)} bytes.")
|
||||
|
||||
# Force garbage collection to prevent memory buildup
|
||||
gc.collect()
|
||||
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Brotli compression error: {e}")
|
||||
|
||||
# Compression failed
|
||||
if fallback_uncompressed:
|
||||
logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed")
|
||||
fallback_path = filepath.replace('.br', '')
|
||||
with open(fallback_path, 'wb') as f:
|
||||
f.write(contents)
|
||||
return fallback_path
|
||||
else:
|
||||
raise Exception(f"Brotli compression failed for {filepath}: {e}")
|
||||
|
||||
|
||||
class model(watch_base):
|
||||
__newest_history_key = None
|
||||
__history_n = 0
|
||||
@@ -93,11 +148,29 @@ class model(watch_base):
|
||||
domain = parsed.hostname
|
||||
return domain
|
||||
|
||||
@property
|
||||
def history_index_filename(self):
|
||||
# So that you dont try to view different histories in different 'diff' setups, can confuse cdio.
|
||||
processor = self.get('processor')
|
||||
if not processor or self.get('processor') == 'text_json_diff':
|
||||
return 'history.txt'
|
||||
else:
|
||||
return f'history-{processor}.txt'
|
||||
|
||||
def clear_watch(self):
|
||||
import pathlib
|
||||
|
||||
# Get list of processor config files to preserve
|
||||
from changedetectionio.processors import find_processors
|
||||
processor_names = [name for cls, name in find_processors()]
|
||||
processor_config_files = {f"{name}.json" for name in processor_names}
|
||||
|
||||
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
|
||||
# But preserve processor config files (they're configuration, not history data)
|
||||
for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
|
||||
# Skip processor config files
|
||||
if item.name in processor_config_files:
|
||||
continue
|
||||
os.unlink(item)
|
||||
|
||||
# Force the attr to recalculate
|
||||
@@ -185,22 +258,25 @@ class model(watch_base):
|
||||
return []
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
if os.path.isfile(fname):
|
||||
logger.debug(f"Reading watch history index for {self.get('uuid')}")
|
||||
with open(fname, "r") as f:
|
||||
with open(fname, "r", encoding='utf-8') as f:
|
||||
for i in f.readlines():
|
||||
if ',' in i:
|
||||
k, v = i.strip().split(',', 2)
|
||||
|
||||
# The index history could contain a relative path, so we need to make the fullpath
|
||||
# so that python can read it
|
||||
if not '/' in v and not '\'' in v:
|
||||
# Cross-platform: check for any path separator (works on Windows and Unix)
|
||||
if os.sep not in v and '/' not in v and '\\' not in v:
|
||||
# Relative filename only, no path separators
|
||||
v = os.path.join(self.watch_data_dir, v)
|
||||
else:
|
||||
# It's possible that they moved the datadir on older versions
|
||||
# So the snapshot exists but is in a different path
|
||||
snapshot_fname = v.split('/')[-1]
|
||||
# Cross-platform: use os.path.basename instead of split('/')
|
||||
snapshot_fname = os.path.basename(v)
|
||||
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||
v = proposed_new_path
|
||||
@@ -218,7 +294,7 @@ class model(watch_base):
|
||||
|
||||
@property
|
||||
def has_history(self):
|
||||
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
return os.path.isfile(fname)
|
||||
|
||||
@property
|
||||
@@ -276,65 +352,120 @@ class model(watch_base):
|
||||
# When the 'last viewed' timestamp is less than the oldest snapshot, return oldest
|
||||
return sorted_keys[-1]
|
||||
|
||||
def get_history_snapshot(self, timestamp):
|
||||
def get_history_snapshot(self, timestamp=None, filepath=None):
|
||||
"""
|
||||
Accepts either timestamp or filepath
|
||||
:param timestamp:
|
||||
:param filepath:
|
||||
:return:
|
||||
"""
|
||||
import brotli
|
||||
filepath = self.history[timestamp]
|
||||
|
||||
# See if a brotli versions exists and switch to that
|
||||
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
|
||||
filepath = f"{filepath}.br"
|
||||
if not filepath:
|
||||
filepath = self.history[timestamp]
|
||||
|
||||
# OR in the backup case that the .br does not exist, but the plain one does
|
||||
if filepath.endswith('.br') and not os.path.isfile(filepath):
|
||||
if os.path.isfile(filepath.replace('.br', '')):
|
||||
filepath = filepath.replace('.br', '')
|
||||
# Check if binary file (image, PDF, etc.)
|
||||
# Binary files are NEVER saved with .br compression, only text files are
|
||||
binary_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.pdf', '.bin', '.jfif')
|
||||
is_binary = any(filepath.endswith(ext) for ext in binary_extensions)
|
||||
|
||||
# Only look for .br versions for text files
|
||||
if not is_binary:
|
||||
# See if a brotli version exists and switch to that (text files only)
|
||||
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
|
||||
filepath = f"{filepath}.br"
|
||||
|
||||
# OR in the backup case that the .br does not exist, but the plain one does
|
||||
if filepath.endswith('.br') and not os.path.isfile(filepath):
|
||||
if os.path.isfile(filepath.replace('.br', '')):
|
||||
filepath = filepath.replace('.br', '')
|
||||
|
||||
# Handle .br compressed text files
|
||||
if filepath.endswith('.br'):
|
||||
# Brotli doesnt have a fileheader to detect it, so we rely on filename
|
||||
# https://www.rfc-editor.org/rfc/rfc7932
|
||||
# Note: .br should ONLY exist for text files, never binary
|
||||
with open(filepath, 'rb') as f:
|
||||
return(brotli.decompress(f.read()).decode('utf-8'))
|
||||
return brotli.decompress(f.read()).decode('utf-8')
|
||||
|
||||
# Binary file - return raw bytes
|
||||
if is_binary:
|
||||
with open(filepath, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
# Text file - decode to string
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
return f.read()
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_text(self, contents, timestamp, snapshot_id):
|
||||
import brotli
|
||||
import tempfile
|
||||
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
|
||||
# Decide on snapshot filename and destination path
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
else:
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
encoded_data = contents.encode('utf-8')
|
||||
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
# Write snapshot file atomically if it doesn't exist
|
||||
def _write_atomic(self, dest, data):
|
||||
"""Write data atomically to dest using a temp file"""
|
||||
if not os.path.exists(dest):
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
|
||||
tmp.write(encoded_data)
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.rename(tmp_path, dest)
|
||||
os.replace(tmp_path, dest)
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
def save_history_blob(self, contents, timestamp, snapshot_id):
|
||||
|
||||
logger.trace(f"{self.get('uuid')} - Updating {self.history_index_filename} with timestamp {timestamp}")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
|
||||
# Binary data - detect file type and save without compression
|
||||
if isinstance(contents, bytes):
|
||||
try:
|
||||
import puremagic
|
||||
detections = puremagic.magic_string(contents[:2048])
|
||||
ext = detections[0].extension if detections else 'bin'
|
||||
# Strip leading dot if present (puremagic returns extensions like '.jfif')
|
||||
ext = ext.lstrip('.')
|
||||
if detections:
|
||||
logger.trace(f"Detected file type: {detections[0].mime_type} -> extension: {ext}")
|
||||
except Exception as e:
|
||||
logger.warning(f"puremagic detection failed: {e}, using 'bin' extension")
|
||||
ext = 'bin'
|
||||
|
||||
snapshot_fname = f"{snapshot_id}.{ext}"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents)
|
||||
logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)")
|
||||
|
||||
# Text data - use brotli compression if enabled and above threshold
|
||||
else:
|
||||
if not skip_brotli and len(contents) > BROTLI_COMPRESS_SIZE_THRESHOLD:
|
||||
# Compressed text
|
||||
import brotli
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
if not os.path.exists(dest):
|
||||
try:
|
||||
actual_dest = _brotli_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True)
|
||||
if actual_dest != dest:
|
||||
snapshot_fname = os.path.basename(actual_dest)
|
||||
except Exception as e:
|
||||
logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
|
||||
# Fallback to uncompressed
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
else:
|
||||
# Plain text
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
|
||||
# Append to history.txt atomically
|
||||
index_fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
index_fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
index_line = f"{timestamp},{snapshot_fname}\n"
|
||||
|
||||
# Lets try force flush here since it's usually a very small file
|
||||
# If this still fails in the future then try reading all to memory first, re-writing etc
|
||||
with open(index_fname, 'a', encoding='utf-8') as f:
|
||||
f.write(index_line)
|
||||
f.flush()
|
||||
@@ -382,7 +513,7 @@ class model(watch_base):
|
||||
# Compare each lines (set) against each history text file (set) looking for something new..
|
||||
existing_history = set({})
|
||||
for k, v in self.history.items():
|
||||
content = self.get_history_snapshot(k)
|
||||
content = self.get_history_snapshot(filepath=v)
|
||||
|
||||
if ignore_whitespace:
|
||||
alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
|
||||
@@ -586,7 +717,7 @@ class model(watch_base):
|
||||
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, 'r') as f:
|
||||
with open(fname, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
return False
|
||||
|
||||
@@ -639,7 +770,7 @@ class model(watch_base):
|
||||
for k, fname in self.history.items():
|
||||
if os.path.isfile(fname):
|
||||
if True:
|
||||
contents = self.get_history_snapshot(k)
|
||||
contents = self.get_history_snapshot(timestamp=k)
|
||||
res = re.findall(regex, contents, re.MULTILINE)
|
||||
if res:
|
||||
if not csv_writer:
|
||||
@@ -732,7 +863,7 @@ class model(watch_base):
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
dates = list(self.history.keys())
|
||||
if len(dates):
|
||||
return self.get_history_snapshot(dates[-1])
|
||||
return self.get_history_snapshot(timestamp=dates[-1])
|
||||
else:
|
||||
return ''
|
||||
|
||||
@@ -742,25 +873,13 @@ class model(watch_base):
|
||||
def save_last_text_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
|
||||
_brotli_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
|
||||
def save_last_fetched_html(self, timestamp, contents):
|
||||
import brotli
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
contents = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
f.write(brotli.compress(contents))
|
||||
except Exception as e:
|
||||
logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
|
||||
logger.warning(e)
|
||||
f.write(contents)
|
||||
|
||||
_brotli_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
self._prune_last_fetched_html_snapshots()
|
||||
|
||||
def get_fetched_html(self, timestamp):
|
||||
@@ -818,6 +937,7 @@ class model(watch_base):
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
if last_error:
|
||||
last_error = safe_jinja.render_fully_escaped(last_error)
|
||||
if '403' in last_error:
|
||||
if has_proxies:
|
||||
output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try other proxies/location</a> '")))
|
||||
@@ -827,7 +947,9 @@ class model(watch_base):
|
||||
output.append(str(Markup(last_error)))
|
||||
|
||||
if self.get('last_notification_error'):
|
||||
output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>")))
|
||||
txt = safe_jinja.render_fully_escaped(self.get('last_notification_error'))
|
||||
result = f'<div class="notification-error"><a href="{url_for("settings.notification_logs")}">{txt}</a></div>'
|
||||
output.append(result)
|
||||
|
||||
else:
|
||||
# Lo_Fi version - no app context, cant rely on Jinja2 Markup
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
import time
|
||||
import re
|
||||
import apprise
|
||||
from apprise import NotifyFormat
|
||||
from loguru import logger
|
||||
@@ -11,11 +12,10 @@ from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMA
|
||||
CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE
|
||||
import re
|
||||
|
||||
from ..notification_service import NotificationContextData
|
||||
from ..notification_service import NotificationContextData, add_rendered_diff_to_notification_vars
|
||||
|
||||
newline_re = re.compile(r'\r\n|\r|\n')
|
||||
|
||||
|
||||
def markup_text_links_to_html(body):
|
||||
"""
|
||||
Convert plaintext to HTML with clickable links.
|
||||
@@ -79,6 +79,24 @@ def notification_format_align_with_apprise(n_format : str):
|
||||
|
||||
return n_format
|
||||
|
||||
|
||||
def apply_html_color_to_body(n_body: str):
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
|
||||
return n_body
|
||||
|
||||
def apply_discord_markdown_to_body(n_body):
|
||||
"""
|
||||
Discord does not support <del> but it supports non-standard ~~strikethrough~~
|
||||
@@ -187,6 +205,8 @@ def replace_placemarkers_in_text(text, url, requested_output_format):
|
||||
|
||||
def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
|
||||
logger.debug(f"Applying markup in '{requested_output_format}' mode")
|
||||
|
||||
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
|
||||
# Because different notifications may require different pre-processing, run each sequentially :(
|
||||
# 2000 bytes minus -
|
||||
@@ -331,6 +351,16 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
if not n_object.get('notification_urls'):
|
||||
return None
|
||||
|
||||
n_object.update(add_rendered_diff_to_notification_vars(
|
||||
notification_scan_text=n_object.get('notification_body', '')+n_object.get('notification_title', ''),
|
||||
current_snapshot=n_object.get('current_snapshot'),
|
||||
prev_snapshot=n_object.get('prev_snapshot'),
|
||||
# Should always be false for 'text' mode or its too hard to read
|
||||
# But otherwise, this could be some setting
|
||||
word_diff=False if requested_output_format_original == 'text' else True,
|
||||
)
|
||||
)
|
||||
|
||||
with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
|
||||
for url in n_object['notification_urls']:
|
||||
|
||||
@@ -397,11 +427,15 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
apprise_input_format = NotifyFormat.TEXT.value
|
||||
requested_output_format = NotifyFormat.TEXT.value
|
||||
|
||||
|
||||
#@todo on null:// (only if its a 1 url with null) probably doesnt need to actually .add/setup/etc
|
||||
sent_objs.append({'title': n_title,
|
||||
'body': n_body,
|
||||
'url': url})
|
||||
apobj.add(url)
|
||||
'url': url,
|
||||
# So that we can do a null:// call and get back exactly what would have been sent
|
||||
'original_context': n_object })
|
||||
|
||||
if not url.startswith('null://'):
|
||||
apobj.add(url)
|
||||
|
||||
# Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely.
|
||||
# It should always be similar to the 'history' part of the UI.
|
||||
@@ -409,15 +443,16 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
if not '<pre' in n_body and not '<body' in n_body: # No custom HTML-ish body was setup already
|
||||
n_body = as_monospaced_html_email(content=n_body, title=n_title)
|
||||
|
||||
apobj.notify(
|
||||
title=n_title,
|
||||
body=n_body,
|
||||
# `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise
|
||||
# &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between)
|
||||
body_format=apprise_input_format,
|
||||
# False is not an option for AppRise, must be type None
|
||||
attach=n_object.get('screenshot', None)
|
||||
)
|
||||
if not url.startswith('null://'):
|
||||
apobj.notify(
|
||||
title=n_title,
|
||||
body=n_body,
|
||||
# `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise
|
||||
# &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between)
|
||||
body_format=apprise_input_format,
|
||||
# False is not an option for AppRise, must be type None
|
||||
attach=n_object.get('screenshot', None)
|
||||
)
|
||||
|
||||
# Returns empty string if nothing found, multi-line string otherwise
|
||||
log_value = logs.getvalue()
|
||||
@@ -436,6 +471,8 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
|
||||
ext_base_url = datastore.data['settings']['application'].get('active_base_url').strip('/')+'/'
|
||||
|
||||
watch = datastore.data['watching'].get(n_object['uuid'])
|
||||
if watch:
|
||||
watch_title = datastore.data['watching'][n_object['uuid']].label
|
||||
@@ -449,20 +486,29 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
|
||||
watch_title = 'Change Detection'
|
||||
watch_tag = ''
|
||||
|
||||
# Create URLs to customise the notification with
|
||||
# active_base_url - set in store.py data property
|
||||
base_url = datastore.data['settings']['application'].get('active_base_url')
|
||||
|
||||
watch_url = n_object['watch_url']
|
||||
|
||||
diff_url = "{}/diff/{}".format(base_url, n_object['uuid'])
|
||||
preview_url = "{}/preview/{}".format(base_url, n_object['uuid'])
|
||||
# Build URLs manually instead of using url_for() to avoid requiring a request context
|
||||
# This allows notifications to be processed in background threads
|
||||
uuid = n_object['uuid']
|
||||
|
||||
if n_object.get('timestamp_from') and n_object.get('timestamp_to'):
|
||||
# Include a link to the diff page with specific versions
|
||||
diff_url = f"{ext_base_url}diff/{uuid}?from_version={n_object['timestamp_from']}&to_version={n_object['timestamp_to']}"
|
||||
else:
|
||||
diff_url = f"{ext_base_url}diff/{uuid}"
|
||||
|
||||
preview_url = f"{ext_base_url}preview/{uuid}"
|
||||
edit_url = f"{ext_base_url}edit/{uuid}"
|
||||
|
||||
# @todo test that preview_url is correct when running in not-null mode?
|
||||
# if not, first time app loads i think it can set a flask context
|
||||
n_object.update(
|
||||
{
|
||||
'base_url': base_url,
|
||||
'base_url': ext_base_url,
|
||||
'diff_url': diff_url,
|
||||
'preview_url': preview_url,
|
||||
'preview_url': preview_url, #@todo include 'version='
|
||||
'edit_url': edit_url, #@todo also pause, also mute link
|
||||
'watch_tag': watch_tag if watch_tag is not None else '',
|
||||
'watch_title': watch_title if watch_title is not None else '',
|
||||
'watch_url': watch_url,
|
||||
|
||||
@@ -5,13 +5,54 @@ Notification Service Module
|
||||
Extracted from update_worker.py to provide standalone notification functionality
|
||||
for both sync and async workers
|
||||
"""
|
||||
import datetime
|
||||
|
||||
import pytz
|
||||
from loguru import logger
|
||||
import time
|
||||
|
||||
from changedetectionio.notification import default_notification_format, valid_notification_formats
|
||||
|
||||
|
||||
def _check_cascading_vars(datastore, var_name, watch):
|
||||
"""
|
||||
Check notification variables in cascading priority:
|
||||
Individual watch settings > Tag settings > Global settings
|
||||
"""
|
||||
from changedetectionio.notification import (
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
|
||||
default_notification_body,
|
||||
default_notification_title
|
||||
)
|
||||
|
||||
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
|
||||
v = watch.get(var_name)
|
||||
if v and not watch.get('notification_muted'):
|
||||
if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
return datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
return v
|
||||
|
||||
tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
v = tag.get(var_name)
|
||||
if v and not tag.get('notification_muted'):
|
||||
return v
|
||||
|
||||
if datastore.data['settings']['application'].get(var_name):
|
||||
return datastore.data['settings']['application'].get(var_name)
|
||||
|
||||
# Otherwise could be defaults
|
||||
if var_name == 'notification_format':
|
||||
return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
if var_name == 'notification_body':
|
||||
return default_notification_body
|
||||
if var_name == 'notification_title':
|
||||
return default_notification_title
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||
class NotificationContextData(dict):
|
||||
@@ -34,6 +75,8 @@ class NotificationContextData(dict):
|
||||
'preview_url': None,
|
||||
'screenshot': None,
|
||||
'triggered_text': None,
|
||||
'timestamp_from': None,
|
||||
'timestamp_to': None,
|
||||
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
'watch_mime_type': None,
|
||||
'watch_tag': None,
|
||||
@@ -72,23 +115,90 @@ class NotificationContextData(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def timestamp_to_localtime(timestamp):
|
||||
# Format the date using locale-aware formatting with timezone
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
|
||||
def set_basic_notification_vars(snapshot_contents, current_snapshot, prev_snapshot, watch, triggered_text):
|
||||
now = time.time()
|
||||
# Get local timezone-aware datetime
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
local_dt = dt.astimezone(local_tz)
|
||||
|
||||
# Format date with timezone - using strftime for locale awareness
|
||||
try:
|
||||
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
|
||||
except:
|
||||
# Fallback if locale issues
|
||||
formatted_date = local_dt.isoformat()
|
||||
|
||||
return formatted_date
|
||||
|
||||
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
|
||||
"""
|
||||
Efficiently renders only the diff placeholders that are actually used in the notification text.
|
||||
|
||||
Scans the notification template for diff placeholder usage (diff, diff_added, diff_clean, etc.)
|
||||
and only renders those specific variants, avoiding expensive render_diff() calls for unused placeholders.
|
||||
Uses LRU caching to avoid duplicate renders when multiple placeholders share the same arguments.
|
||||
|
||||
Args:
|
||||
notification_scan_text: The notification template text to scan for placeholders
|
||||
prev_snapshot: Previous version of content for diff comparison
|
||||
current_snapshot: Current version of content for diff comparison
|
||||
word_diff: Whether to use word-level (True) or line-level (False) diffing
|
||||
|
||||
Returns:
|
||||
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
import re
|
||||
from functools import lru_cache
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Define specifications for each diff variant
|
||||
diff_specs = {
|
||||
'diff': {'word_diff': word_diff},
|
||||
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
|
||||
'diff_added': {'word_diff': word_diff, 'include_removed': False},
|
||||
'diff_added_clean': {'word_diff': word_diff, 'include_removed': False, 'include_change_type_prefix': False},
|
||||
'diff_full': {'word_diff': word_diff, 'include_equal': True},
|
||||
'diff_full_clean': {'word_diff': word_diff, 'include_equal': True, 'include_change_type_prefix': False},
|
||||
'diff_patch': {'word_diff': word_diff, 'patch_format': True},
|
||||
'diff_removed': {'word_diff': word_diff, 'include_added': False},
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
# Memoize render_diff to avoid duplicate renders with same kwargs
|
||||
@lru_cache(maxsize=4)
|
||||
def cached_render(kwargs_tuple):
|
||||
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only check and render diff keys that exist in NotificationContextData
|
||||
for key in NotificationContextData().keys():
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
kwargs = diff_specs[key]
|
||||
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
|
||||
ret[key] = cached_render(tuple(sorted(kwargs.items())))
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
|
||||
|
||||
return ret
|
||||
|
||||
def set_basic_notification_vars(current_snapshot, prev_snapshot, watch, triggered_text, timestamp_changed=None):
|
||||
|
||||
n_object = {
|
||||
'current_snapshot': snapshot_contents,
|
||||
'diff': diff.render_diff(prev_snapshot, current_snapshot),
|
||||
'diff_clean': diff.render_diff(prev_snapshot, current_snapshot, include_change_type_prefix=False),
|
||||
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False),
|
||||
'diff_added_clean': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, include_change_type_prefix=False),
|
||||
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True),
|
||||
'diff_full_clean': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, include_change_type_prefix=False),
|
||||
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, patch_format=True),
|
||||
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False),
|
||||
'diff_removed_clean': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, include_change_type_prefix=False),
|
||||
'current_snapshot': current_snapshot,
|
||||
'prev_snapshot': prev_snapshot,
|
||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||
'change_datetime': timestamp_to_localtime(timestamp_changed) if timestamp_changed else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch.get('uuid') if watch else None,
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
@@ -101,7 +211,6 @@ def set_basic_notification_vars(snapshot_contents, current_snapshot, prev_snapsh
|
||||
if watch:
|
||||
n_object.update(watch.extra_notification_token_values())
|
||||
|
||||
logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time() - now:.3f}s")
|
||||
return n_object
|
||||
|
||||
class NotificationService:
|
||||
@@ -114,7 +223,7 @@ class NotificationService:
|
||||
self.datastore = datastore
|
||||
self.notification_q = notification_q
|
||||
|
||||
def queue_notification_for_watch(self, n_object: NotificationContextData, watch):
|
||||
def queue_notification_for_watch(self, n_object: NotificationContextData, watch, date_index_from=-2, date_index_to=-1):
|
||||
"""
|
||||
Queue a notification for a watch with full diff rendering and template variables
|
||||
"""
|
||||
@@ -133,7 +242,7 @@ class NotificationService:
|
||||
|
||||
# Add text that was triggered
|
||||
if len(dates):
|
||||
snapshot_contents = watch.get_history_snapshot(dates[-1])
|
||||
snapshot_contents = watch.get_history_snapshot(timestamp=dates[-1])
|
||||
else:
|
||||
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
|
||||
|
||||
@@ -154,57 +263,22 @@ class NotificationService:
|
||||
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
|
||||
|
||||
if len(dates) > 1:
|
||||
prev_snapshot = watch.get_history_snapshot(dates[-2])
|
||||
current_snapshot = watch.get_history_snapshot(dates[-1])
|
||||
prev_snapshot = watch.get_history_snapshot(timestamp=dates[date_index_from])
|
||||
current_snapshot = watch.get_history_snapshot(timestamp=dates[date_index_to])
|
||||
|
||||
|
||||
n_object.update(set_basic_notification_vars(snapshot_contents=snapshot_contents,
|
||||
current_snapshot=current_snapshot,
|
||||
n_object.update(set_basic_notification_vars(current_snapshot=current_snapshot,
|
||||
prev_snapshot=prev_snapshot,
|
||||
watch=watch,
|
||||
triggered_text=triggered_text))
|
||||
triggered_text=triggered_text,
|
||||
timestamp_changed=dates[date_index_to]))
|
||||
|
||||
logger.debug("Queued notification for sending")
|
||||
self.notification_q.put(n_object)
|
||||
|
||||
def _check_cascading_vars(self, var_name, watch):
|
||||
"""
|
||||
Check notification variables in cascading priority:
|
||||
Individual watch settings > Tag settings > Global settings
|
||||
"""
|
||||
from changedetectionio.notification import (
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
|
||||
default_notification_body,
|
||||
default_notification_title
|
||||
)
|
||||
|
||||
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
|
||||
v = watch.get(var_name)
|
||||
if v and not watch.get('notification_muted'):
|
||||
if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
return self.datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
return v
|
||||
|
||||
tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
v = tag.get(var_name)
|
||||
if v and not tag.get('notification_muted'):
|
||||
return v
|
||||
|
||||
if self.datastore.data['settings']['application'].get(var_name):
|
||||
return self.datastore.data['settings']['application'].get(var_name)
|
||||
|
||||
# Otherwise could be defaults
|
||||
if var_name == 'notification_format':
|
||||
return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
if var_name == 'notification_body':
|
||||
return default_notification_body
|
||||
if var_name == 'notification_title':
|
||||
return default_notification_title
|
||||
|
||||
return None
|
||||
if self.notification_q:
|
||||
logger.debug("Queued notification for sending")
|
||||
self.notification_q.put(n_object)
|
||||
else:
|
||||
logger.debug("Not queued, no queue defined. Just returning processed data")
|
||||
return n_object
|
||||
|
||||
def send_content_changed_notification(self, watch_uuid):
|
||||
"""
|
||||
@@ -227,10 +301,11 @@ class NotificationService:
|
||||
# Should be a better parent getter in the model object
|
||||
|
||||
# Prefer - Individual watch settings > Tag settings > Global settings (in that order)
|
||||
n_object['notification_urls'] = self._check_cascading_vars('notification_urls', watch)
|
||||
n_object['notification_title'] = self._check_cascading_vars('notification_title', watch)
|
||||
n_object['notification_body'] = self._check_cascading_vars('notification_body', watch)
|
||||
n_object['notification_format'] = self._check_cascading_vars('notification_format', watch)
|
||||
# this change probably not needed?
|
||||
n_object['notification_urls'] = _check_cascading_vars(self.datastore, 'notification_urls', watch)
|
||||
n_object['notification_title'] = _check_cascading_vars(self.datastore,'notification_title', watch)
|
||||
n_object['notification_body'] = _check_cascading_vars(self.datastore,'notification_body', watch)
|
||||
n_object['notification_format'] = _check_cascading_vars(self.datastore,'notification_format', watch)
|
||||
|
||||
# (Individual watch) Only prepare to notify if the rules above matched
|
||||
queued = False
|
||||
@@ -269,7 +344,7 @@ Thanks - Your omniscient changedetection.io installation.
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
||||
'notification_body': body,
|
||||
'notification_format': self._check_cascading_vars('notification_format', watch),
|
||||
'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ import pluggy
|
||||
import os
|
||||
import importlib
|
||||
import sys
|
||||
from loguru import logger
|
||||
|
||||
# Global plugin namespace for changedetection.io
|
||||
PLUGIN_NAMESPACE = "changedetectionio"
|
||||
@@ -16,15 +17,94 @@ class ChangeDetectionSpec:
|
||||
@hookspec
|
||||
def ui_edit_stats_extras(watch):
|
||||
"""Return HTML content to add to the stats tab in the edit view.
|
||||
|
||||
|
||||
Args:
|
||||
watch: The watch object being edited
|
||||
|
||||
|
||||
Returns:
|
||||
str: HTML content to be inserted in the stats tab
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def register_content_fetcher(self):
|
||||
"""Return a tuple of (fetcher_name, fetcher_class) for content fetcher plugins.
|
||||
|
||||
The fetcher_name should start with 'html_' and the fetcher_class
|
||||
should inherit from changedetectionio.content_fetchers.base.Fetcher
|
||||
|
||||
Returns:
|
||||
tuple: (str: fetcher_name, class: fetcher_class)
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def fetcher_status_icon(fetcher_name):
|
||||
"""Return status icon HTML attributes for a content fetcher.
|
||||
|
||||
Args:
|
||||
fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
str: HTML string containing <img> tags or other status icon elements
|
||||
Empty string if no custom status icon is needed
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def plugin_static_path(self):
|
||||
"""Return the path to the plugin's static files directory.
|
||||
|
||||
Returns:
|
||||
str: Absolute path to the plugin's static directory, or None if no static files
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def get_itemprop_availability_override(self, content, fetcher_name, fetcher_instance, url):
|
||||
"""Provide custom implementation of get_itemprop_availability for a specific fetcher.
|
||||
|
||||
This hook allows plugins to provide their own product availability detection
|
||||
when their fetcher is being used. This is called as a fallback when the built-in
|
||||
method doesn't find good data.
|
||||
|
||||
Args:
|
||||
content: The HTML/text content to parse
|
||||
fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
|
||||
fetcher_instance: The fetcher instance that generated the content
|
||||
url: The URL being watched/checked
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with availability data:
|
||||
{
|
||||
'price': float or None,
|
||||
'availability': str or None, # e.g., 'in stock', 'out of stock'
|
||||
'currency': str or None, # e.g., 'USD', 'EUR'
|
||||
}
|
||||
Or None if this plugin doesn't handle this fetcher or couldn't extract data
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def plugin_settings_tab(self):
|
||||
"""Return settings tab information for this plugin.
|
||||
|
||||
This hook allows plugins to add their own settings tab to the settings page.
|
||||
Settings will be saved to a separate JSON file in the datastore directory.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with settings tab information:
|
||||
{
|
||||
'plugin_id': str, # Unique identifier (e.g., 'zyte_fetcher')
|
||||
'tab_label': str, # Display name for tab (e.g., 'Zyte Fetcher')
|
||||
'form_class': Form, # WTForms Form class for the settings
|
||||
'template_path': str, # Optional: path to Jinja2 template (relative to plugin)
|
||||
# If not provided, a default form renderer will be used
|
||||
}
|
||||
Or None if this plugin doesn't provide settings
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
@@ -65,18 +145,334 @@ load_plugins_from_directories()
|
||||
# Discover installed plugins from external packages (if any)
|
||||
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
|
||||
|
||||
# Function to inject datastore into plugins that need it
|
||||
def inject_datastore_into_plugins(datastore):
|
||||
"""Inject the global datastore into plugins that need access to settings.
|
||||
|
||||
This should be called after plugins are loaded and datastore is initialized.
|
||||
|
||||
Args:
|
||||
datastore: The global ChangeDetectionStore instance
|
||||
"""
|
||||
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
|
||||
# Check if plugin has datastore attribute and it's not set
|
||||
if hasattr(plugin_obj, 'datastore'):
|
||||
if plugin_obj.datastore is None:
|
||||
plugin_obj.datastore = datastore
|
||||
logger.debug(f"Injected datastore into plugin: {plugin_name}")
|
||||
|
||||
# Function to register built-in fetchers - called later from content_fetchers/__init__.py
|
||||
def register_builtin_fetchers():
|
||||
"""Register built-in content fetchers as internal plugins
|
||||
|
||||
This is called from content_fetchers/__init__.py after all fetchers are imported
|
||||
to avoid circular import issues.
|
||||
"""
|
||||
from changedetectionio.content_fetchers import requests, playwright, puppeteer, webdriver_selenium
|
||||
|
||||
# Register each built-in fetcher plugin
|
||||
if hasattr(requests, 'requests_plugin'):
|
||||
plugin_manager.register(requests.requests_plugin, 'builtin_requests')
|
||||
|
||||
if hasattr(playwright, 'playwright_plugin'):
|
||||
plugin_manager.register(playwright.playwright_plugin, 'builtin_playwright')
|
||||
|
||||
if hasattr(puppeteer, 'puppeteer_plugin'):
|
||||
plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer')
|
||||
|
||||
if hasattr(webdriver_selenium, 'webdriver_selenium_plugin'):
|
||||
plugin_manager.register(webdriver_selenium.webdriver_selenium_plugin, 'builtin_webdriver_selenium')
|
||||
|
||||
# Helper function to collect UI stats extras from all plugins
|
||||
def collect_ui_edit_stats_extras(watch):
|
||||
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
|
||||
extras_content = []
|
||||
|
||||
|
||||
# Get all plugins that implement the ui_edit_stats_extras hook
|
||||
results = plugin_manager.hook.ui_edit_stats_extras(watch=watch)
|
||||
|
||||
|
||||
# If we have results, add them to our content
|
||||
if results:
|
||||
for result in results:
|
||||
if result: # Skip empty results
|
||||
extras_content.append(result)
|
||||
|
||||
return "\n".join(extras_content) if extras_content else ""
|
||||
|
||||
return "\n".join(extras_content) if extras_content else ""
|
||||
|
||||
def collect_fetcher_status_icons(fetcher_name):
|
||||
"""Collect status icon data from all plugins
|
||||
|
||||
Args:
|
||||
fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
dict or None: Icon data dictionary from first matching plugin, or None
|
||||
"""
|
||||
# Get status icon data from plugins
|
||||
results = plugin_manager.hook.fetcher_status_icon(fetcher_name=fetcher_name)
|
||||
|
||||
# Return first non-None result
|
||||
if results:
|
||||
for result in results:
|
||||
if result and isinstance(result, dict):
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instance, url):
|
||||
"""Get itemprop availability data from plugins as a fallback.
|
||||
|
||||
This is called when the built-in get_itemprop_availability doesn't find good data.
|
||||
|
||||
Args:
|
||||
content: The HTML/text content to parse
|
||||
fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte')
|
||||
fetcher_instance: The fetcher instance that generated the content
|
||||
url: The URL being watched (watch.link - includes Jinja2 evaluation)
|
||||
|
||||
Returns:
|
||||
dict or None: Availability data dictionary from first matching plugin, or None
|
||||
"""
|
||||
# Get availability data from plugins
|
||||
results = plugin_manager.hook.get_itemprop_availability_override(
|
||||
content=content,
|
||||
fetcher_name=fetcher_name,
|
||||
fetcher_instance=fetcher_instance,
|
||||
url=url
|
||||
)
|
||||
|
||||
# Return first non-None result with actual data
|
||||
if results:
|
||||
for result in results:
|
||||
if result and isinstance(result, dict):
|
||||
# Check if the result has any meaningful data
|
||||
if result.get('price') is not None or result.get('availability'):
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_active_plugins():
|
||||
"""Get a list of active plugins with their descriptions.
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries with plugin information:
|
||||
[
|
||||
{'name': 'plugin_name', 'description': 'Plugin description'},
|
||||
...
|
||||
]
|
||||
"""
|
||||
active_plugins = []
|
||||
|
||||
# Get all registered plugins
|
||||
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
|
||||
# Skip built-in plugins (they start with 'builtin_')
|
||||
if plugin_name.startswith('builtin_'):
|
||||
continue
|
||||
|
||||
# Get plugin description if available
|
||||
description = None
|
||||
if hasattr(plugin_obj, '__doc__') and plugin_obj.__doc__:
|
||||
description = plugin_obj.__doc__.strip().split('\n')[0] # First line only
|
||||
elif hasattr(plugin_obj, 'description'):
|
||||
description = plugin_obj.description
|
||||
|
||||
# Try to get a friendly name from the plugin
|
||||
friendly_name = plugin_name
|
||||
if hasattr(plugin_obj, 'name'):
|
||||
friendly_name = plugin_obj.name
|
||||
|
||||
active_plugins.append({
|
||||
'name': friendly_name,
|
||||
'description': description or 'No description available'
|
||||
})
|
||||
|
||||
return active_plugins
|
||||
|
||||
|
||||
def get_fetcher_capabilities(watch, datastore):
|
||||
"""Get capability flags for a watch's fetcher.
|
||||
|
||||
Args:
|
||||
watch: The watch object/dict
|
||||
datastore: The datastore to resolve 'system' fetcher
|
||||
|
||||
Returns:
|
||||
dict: Dictionary with capability flags:
|
||||
{
|
||||
'supports_browser_steps': bool,
|
||||
'supports_screenshots': bool,
|
||||
'supports_xpath_element_data': bool
|
||||
}
|
||||
"""
|
||||
# Get the fetcher name from watch
|
||||
fetcher_name = watch.get('fetch_backend', 'system')
|
||||
|
||||
# Resolve 'system' to actual fetcher
|
||||
if fetcher_name == 'system':
|
||||
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
|
||||
# Get the fetcher class
|
||||
from changedetectionio import content_fetchers
|
||||
|
||||
# Try to get from built-in fetchers first
|
||||
if hasattr(content_fetchers, fetcher_name):
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name)
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
|
||||
}
|
||||
|
||||
# Try to get from plugin-provided fetchers
|
||||
# Query all plugins for registered fetchers
|
||||
plugin_fetchers = plugin_manager.hook.register_content_fetcher()
|
||||
for fetcher_registration in plugin_fetchers:
|
||||
if fetcher_registration:
|
||||
name, fetcher_class = fetcher_registration
|
||||
if name == fetcher_name:
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
|
||||
}
|
||||
|
||||
# Default: no capabilities
|
||||
return {
|
||||
'supports_browser_steps': False,
|
||||
'supports_screenshots': False,
|
||||
'supports_xpath_element_data': False
|
||||
}
|
||||
|
||||
|
||||
def get_plugin_settings_tabs():
|
||||
"""Get all plugin settings tabs.
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries with plugin settings tab information:
|
||||
[
|
||||
{
|
||||
'plugin_id': str,
|
||||
'tab_label': str,
|
||||
'form_class': Form,
|
||||
'description': str
|
||||
},
|
||||
...
|
||||
]
|
||||
"""
|
||||
tabs = []
|
||||
results = plugin_manager.hook.plugin_settings_tab()
|
||||
|
||||
for result in results:
|
||||
if result and isinstance(result, dict):
|
||||
# Validate required fields
|
||||
if 'plugin_id' in result and 'tab_label' in result and 'form_class' in result:
|
||||
tabs.append(result)
|
||||
else:
|
||||
logger.warning(f"Invalid plugin settings tab spec: {result}")
|
||||
|
||||
return tabs
|
||||
|
||||
|
||||
def load_plugin_settings(datastore_path, plugin_id):
|
||||
"""Load settings for a specific plugin from JSON file.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
plugin_id: Unique identifier for the plugin (e.g., 'zyte_fetcher')
|
||||
|
||||
Returns:
|
||||
dict: Plugin settings, or empty dict if file doesn't exist
|
||||
"""
|
||||
import json
|
||||
settings_file = os.path.join(datastore_path, f"{plugin_id}.json")
|
||||
|
||||
if not os.path.exists(settings_file):
|
||||
return {}
|
||||
|
||||
try:
|
||||
with open(settings_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load settings for plugin '{plugin_id}': {e}")
|
||||
return {}
|
||||
|
||||
|
||||
def save_plugin_settings(datastore_path, plugin_id, settings):
|
||||
"""Save settings for a specific plugin to JSON file.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
plugin_id: Unique identifier for the plugin (e.g., 'zyte_fetcher')
|
||||
settings: Dictionary of settings to save
|
||||
|
||||
Returns:
|
||||
bool: True if save was successful, False otherwise
|
||||
"""
|
||||
import json
|
||||
settings_file = os.path.join(datastore_path, f"{plugin_id}.json")
|
||||
|
||||
try:
|
||||
with open(settings_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(settings, f, indent=2, ensure_ascii=False)
|
||||
logger.info(f"Saved settings for plugin '{plugin_id}' to {settings_file}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save settings for plugin '{plugin_id}': {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_plugin_template_paths():
|
||||
"""Get list of plugin template directories for Jinja2 loader.
|
||||
|
||||
Scans both external pluggy plugins and built-in processor plugins.
|
||||
|
||||
Returns:
|
||||
list: List of absolute paths to plugin template directories
|
||||
"""
|
||||
template_paths = []
|
||||
|
||||
# Add the base processors/templates directory (as absolute path)
|
||||
processors_templates_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'processors', 'templates')
|
||||
if os.path.isdir(processors_templates_dir):
|
||||
template_paths.append(processors_templates_dir)
|
||||
logger.debug(f"Added base processors template path: {processors_templates_dir}")
|
||||
|
||||
# Scan built-in processor plugins
|
||||
from changedetectionio.processors import find_processors
|
||||
processor_list = find_processors()
|
||||
for processor_module, processor_name in processor_list:
|
||||
# Each processor is a module, check if it has a templates directory
|
||||
if hasattr(processor_module, '__file__'):
|
||||
processor_file = processor_module.__file__
|
||||
if processor_file:
|
||||
# Get the processor directory (e.g., processors/image_ssim_diff/)
|
||||
processor_dir = os.path.dirname(os.path.abspath(processor_file))
|
||||
templates_dir = os.path.join(processor_dir, 'templates')
|
||||
if os.path.isdir(templates_dir):
|
||||
template_paths.append(templates_dir)
|
||||
logger.debug(f"Added processor template path: {templates_dir}")
|
||||
|
||||
# Get all registered external pluggy plugins
|
||||
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
|
||||
# Check if plugin has a templates directory
|
||||
if hasattr(plugin_obj, '__file__'):
|
||||
plugin_file = plugin_obj.__file__
|
||||
elif hasattr(plugin_obj, '__module__'):
|
||||
# Get the module file
|
||||
module = sys.modules.get(plugin_obj.__module__)
|
||||
if module and hasattr(module, '__file__'):
|
||||
plugin_file = module.__file__
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
if plugin_file:
|
||||
plugin_dir = os.path.dirname(os.path.abspath(plugin_file))
|
||||
templates_dir = os.path.join(plugin_dir, 'templates')
|
||||
if os.path.isdir(templates_dir):
|
||||
template_paths.append(templates_dir)
|
||||
logger.debug(f"Added plugin template path: {templates_dir}")
|
||||
|
||||
return template_paths
|
||||
@@ -1,180 +1,10 @@
|
||||
from abc import abstractmethod
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from copy import deepcopy
|
||||
from functools import lru_cache
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
from flask_babel import gettext
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
import pkgutil
|
||||
import re
|
||||
|
||||
class difference_detection_processor():
|
||||
|
||||
browser_steps = None
|
||||
datastore = None
|
||||
fetcher = None
|
||||
screenshot = None
|
||||
watch = None
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
|
||||
def __init__(self, *args, datastore, watch_uuid, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.datastore = datastore
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
|
||||
if re.search(r'^file:', url.strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
# Proxy ID "key"
|
||||
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
||||
|
||||
# Pluggable content self.fetcher
|
||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
||||
|
||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||
custom_browser_connection_url = None
|
||||
if prefer_fetch_backend.startswith('extra_browser_'):
|
||||
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
||||
connection = list(
|
||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||
if connection:
|
||||
prefer_fetch_backend = 'html_webdriver'
|
||||
custom_browser_connection_url = connection[0].get('browser_connection_url')
|
||||
|
||||
# PDF should be html_requests because playwright will serve it up (so far) in a embedded page
|
||||
# @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
|
||||
# @todo needs test to or a fix
|
||||
if self.watch.is_pdf:
|
||||
prefer_fetch_backend = "html_requests"
|
||||
|
||||
# Grab the right kind of 'fetcher', (playwright, requests, etc)
|
||||
from changedetectionio import content_fetchers
|
||||
if hasattr(content_fetchers, prefer_fetch_backend):
|
||||
# @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS
|
||||
if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps:
|
||||
# This is never supported in selenium anyway
|
||||
logger.warning("Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.")
|
||||
from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher
|
||||
fetcher_obj = playwright_fetcher
|
||||
else:
|
||||
fetcher_obj = getattr(content_fetchers, prefer_fetch_backend)
|
||||
else:
|
||||
# What it referenced doesnt exist, Just use a default
|
||||
fetcher_obj = getattr(content_fetchers, "html_requests")
|
||||
|
||||
proxy_url = None
|
||||
if preferred_proxy_id:
|
||||
# Custom browser endpoints should NOT have a proxy added
|
||||
if not prefer_fetch_backend.startswith('extra_browser_'):
|
||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
||||
else:
|
||||
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||
|
||||
logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
|
||||
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||
custom_browser_connection_url=custom_browser_connection_url
|
||||
)
|
||||
|
||||
if self.watch.has_browser_steps:
|
||||
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
if ua and ua.get(prefer_fetch_backend):
|
||||
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
||||
|
||||
request_headers.update(self.watch.get('headers', {}))
|
||||
request_headers.update(self.datastore.get_all_base_headers())
|
||||
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||
# do this by accident.
|
||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
for header_name in request_headers:
|
||||
request_headers.update({header_name: jinja_render(template_str=request_headers.get(header_name))})
|
||||
|
||||
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||
|
||||
request_body = self.watch.get('body')
|
||||
if request_body:
|
||||
request_body = jinja_render(template_str=self.watch.get('body'))
|
||||
|
||||
request_method = self.watch.get('method')
|
||||
ignore_status_codes = self.watch.get('ignore_status_codes', False)
|
||||
|
||||
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
|
||||
if self.watch.get('webdriver_delay'):
|
||||
self.fetcher.render_extract_delay = self.watch.get('webdriver_delay')
|
||||
elif system_webdriver_delay is not None:
|
||||
self.fetcher.render_extract_delay = system_webdriver_delay
|
||||
|
||||
if self.watch.get('webdriver_js_execute_code') is not None and self.watch.get('webdriver_js_execute_code').strip():
|
||||
self.fetcher.webdriver_js_execute_code = self.watch.get('webdriver_js_execute_code')
|
||||
|
||||
# Requests for PDF's, images etc should be passwd the is_binary flag
|
||||
is_binary = self.watch.is_pdf
|
||||
|
||||
# And here we go! call the right browser with browser-specific settings
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
# All fetchers are now async
|
||||
await self.fetcher.run(
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
fetch_favicon=self.watch.favicon_is_expired(),
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
is_binary=is_binary,
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
)
|
||||
|
||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||
self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, watch):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
changed_detected = False
|
||||
return changed_detected, update_obj, ''.encode('utf-8')
|
||||
|
||||
|
||||
def find_sub_packages(package_name):
|
||||
"""
|
||||
@@ -198,6 +28,7 @@ def find_processors():
|
||||
|
||||
processors = []
|
||||
sub_packages = find_sub_packages(package_name)
|
||||
from changedetectionio.processors.base import difference_detection_processor
|
||||
|
||||
for sub_package in sub_packages:
|
||||
module_name = f"{package_name}.{sub_package}.processor"
|
||||
@@ -206,8 +37,12 @@ def find_processors():
|
||||
|
||||
# Iterate through all classes in the module
|
||||
for name, obj in inspect.getmembers(module, inspect.isclass):
|
||||
if issubclass(obj, difference_detection_processor) and obj is not difference_detection_processor:
|
||||
# Only register classes that are actually defined in this module (not imported)
|
||||
if (issubclass(obj, difference_detection_processor) and
|
||||
obj is not difference_detection_processor and
|
||||
obj.__module__ == module.__name__):
|
||||
processors.append((module, sub_package))
|
||||
break # Only need one processor per module
|
||||
except (ModuleNotFoundError, ImportError) as e:
|
||||
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
||||
|
||||
@@ -242,17 +77,205 @@ def get_custom_watch_obj_for_processor(processor_name):
|
||||
return watch_class
|
||||
|
||||
|
||||
def find_processor_module(processor_name):
|
||||
"""
|
||||
Find the processor module by name.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'image_ssim_diff')
|
||||
|
||||
Returns:
|
||||
module: The processor's parent module, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if processor_tuple:
|
||||
# Return the parent module (the package containing processor.py)
|
||||
return get_parent_module(processor_tuple[0])
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
|
||||
:return: A list :)
|
||||
"""
|
||||
|
||||
processor_classes = find_processors()
|
||||
|
||||
# Check if ALLOWED_PROCESSORS env var is set
|
||||
# For now we disable it, need to make a deploy with lots of new code and this will be an overload
|
||||
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', 'text_json_diff, restock_diff').strip()
|
||||
allowed_processors = None
|
||||
if allowed_processors_env:
|
||||
# Parse comma-separated list and strip whitespace
|
||||
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
|
||||
|
||||
available = []
|
||||
for package, processor_class in processor_classes:
|
||||
available.append((processor_class, package.name))
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Filter by allowed processors if set
|
||||
if allowed_processors and sub_package_name not in allowed_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
|
||||
continue
|
||||
|
||||
return available
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
else:
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
|
||||
available.append((sub_package_name, description, weight))
|
||||
|
||||
# Sort by weight (lower weight = appears first)
|
||||
available.sort(key=lambda x: x[2])
|
||||
|
||||
# Return as tuples without weight (for backwards compatibility)
|
||||
return [(name, desc) for name, desc, weight in available]
|
||||
|
||||
|
||||
def get_processor_badge_texts():
|
||||
"""
|
||||
Get a dictionary mapping processor names to their list_badge_text values.
|
||||
Translations are applied based on the current request locale.
|
||||
|
||||
:return: A dict mapping processor name to badge text (e.g., {'text_json_diff': 'Text', 'restock_diff': 'Restock'})
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
badge_texts = {}
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Try to get the 'list_badge_text' attribute from the processor module
|
||||
if hasattr(module, 'list_badge_text'):
|
||||
badge_texts[sub_package_name] = gettext(module.list_badge_text)
|
||||
else:
|
||||
# Fall back to parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'list_badge_text'):
|
||||
badge_texts[sub_package_name] = gettext(parent_module.list_badge_text)
|
||||
|
||||
return badge_texts
|
||||
|
||||
|
||||
def get_processor_descriptions():
|
||||
"""
|
||||
Get a dictionary mapping processor names to their description/name values.
|
||||
Translations are applied based on the current request locale.
|
||||
|
||||
:return: A dict mapping processor name to description (e.g., {'text_json_diff': 'Webpage Text/HTML, JSON and PDF changes'})
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
descriptions = {}
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Try to get the 'name' or 'description' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
descriptions[sub_package_name] = gettext(module.name)
|
||||
elif hasattr(module, 'description'):
|
||||
descriptions[sub_package_name] = gettext(module.description)
|
||||
else:
|
||||
# Fall back to parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
descriptions[sub_package_name] = gettext(parent_module.processor_description)
|
||||
elif parent_module and hasattr(parent_module, 'name'):
|
||||
descriptions[sub_package_name] = gettext(parent_module.name)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
descriptions[sub_package_name] = sub_package_name.replace('_', ' ').title()
|
||||
|
||||
return descriptions
|
||||
|
||||
|
||||
def generate_processor_badge_colors(processor_name):
|
||||
"""
|
||||
Generate consistent colors for a processor badge based on its name.
|
||||
Uses a hash of the processor name to generate pleasing, accessible colors
|
||||
for both light and dark modes.
|
||||
|
||||
:param processor_name: The processor name (e.g., 'text_json_diff')
|
||||
:return: A dict with 'light' and 'dark' color schemes, each containing 'bg' and 'color'
|
||||
"""
|
||||
import hashlib
|
||||
|
||||
# Generate a consistent hash from the processor name
|
||||
hash_obj = hashlib.md5(processor_name.encode('utf-8'))
|
||||
hash_int = int(hash_obj.hexdigest()[:8], 16)
|
||||
|
||||
# Generate hue from hash (0-360)
|
||||
hue = hash_int % 360
|
||||
|
||||
# Light mode: pastel background with darker text
|
||||
light_saturation = 60 + (hash_int % 25) # 60-85%
|
||||
light_lightness = 85 + (hash_int % 10) # 85-95% - very light
|
||||
text_lightness = 25 + (hash_int % 15) # 25-40% - dark
|
||||
|
||||
# Dark mode: solid, vibrant colors with white text
|
||||
dark_saturation = 55 + (hash_int % 20) # 55-75%
|
||||
dark_lightness = 45 + (hash_int % 15) # 45-60%
|
||||
|
||||
return {
|
||||
'light': {
|
||||
'bg': f'hsl({hue}, {light_saturation}%, {light_lightness}%)',
|
||||
'color': f'hsl({hue}, 50%, {text_lightness}%)'
|
||||
},
|
||||
'dark': {
|
||||
'bg': f'hsl({hue}, {dark_saturation}%, {dark_lightness}%)',
|
||||
'color': '#fff'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_processor_badge_css():
|
||||
"""
|
||||
Generate CSS for all processor badges with auto-generated colors.
|
||||
This creates CSS rules for both light and dark modes for each processor.
|
||||
|
||||
:return: A string containing CSS rules for all processor badges
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
css_rules = []
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
colors = generate_processor_badge_colors(sub_package_name)
|
||||
|
||||
# Light mode rule
|
||||
css_rules.append(
|
||||
f".processor-badge-{sub_package_name} {{\n"
|
||||
f" background-color: {colors['light']['bg']};\n"
|
||||
f" color: {colors['light']['color']};\n"
|
||||
f"}}"
|
||||
)
|
||||
|
||||
# Dark mode rule
|
||||
css_rules.append(
|
||||
f"html[data-darkmode=\"true\"] .processor-badge-{sub_package_name} {{\n"
|
||||
f" background-color: {colors['dark']['bg']};\n"
|
||||
f" color: {colors['dark']['color']};\n"
|
||||
f"}}"
|
||||
)
|
||||
|
||||
return '\n\n'.join(css_rules)
|
||||
|
||||
|
||||
259
changedetectionio/processors/base.py
Normal file
259
changedetectionio/processors/base.py
Normal file
@@ -0,0 +1,259 @@
|
||||
import re
|
||||
import hashlib
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from copy import deepcopy
|
||||
from abc import abstractmethod
|
||||
import os
|
||||
from loguru import logger
|
||||
|
||||
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
||||
SCREENSHOT_FORMAT_PNG = 'PNG'
|
||||
|
||||
class difference_detection_processor():
|
||||
browser_steps = None
|
||||
datastore = None
|
||||
fetcher = None
|
||||
screenshot = None
|
||||
watch = None
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
screenshot_format = SCREENSHOT_FORMAT_JPEG
|
||||
|
||||
def __init__(self, datastore, watch_uuid):
|
||||
self.datastore = datastore
|
||||
self.watch_uuid = watch_uuid
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
|
||||
if re.search(r'^file:', url.strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
# Proxy ID "key"
|
||||
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(
|
||||
uuid=self.watch.get('uuid'))
|
||||
|
||||
# Pluggable content self.fetcher
|
||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
||||
|
||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||
custom_browser_connection_url = None
|
||||
if prefer_fetch_backend.startswith('extra_browser_'):
|
||||
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
||||
connection = list(
|
||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||
if connection:
|
||||
prefer_fetch_backend = 'html_webdriver'
|
||||
custom_browser_connection_url = connection[0].get('browser_connection_url')
|
||||
|
||||
# PDF should be html_requests because playwright will serve it up (so far) in a embedded page
|
||||
# @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
|
||||
# @todo needs test to or a fix
|
||||
if self.watch.is_pdf:
|
||||
prefer_fetch_backend = "html_requests"
|
||||
|
||||
# Grab the right kind of 'fetcher', (playwright, requests, etc)
|
||||
from changedetectionio import content_fetchers
|
||||
if hasattr(content_fetchers, prefer_fetch_backend):
|
||||
# @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS
|
||||
if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps:
|
||||
# This is never supported in selenium anyway
|
||||
logger.warning(
|
||||
"Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.")
|
||||
from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher
|
||||
fetcher_obj = playwright_fetcher
|
||||
else:
|
||||
fetcher_obj = getattr(content_fetchers, prefer_fetch_backend)
|
||||
else:
|
||||
# What it referenced doesnt exist, Just use a default
|
||||
fetcher_obj = getattr(content_fetchers, "html_requests")
|
||||
|
||||
proxy_url = None
|
||||
if preferred_proxy_id:
|
||||
# Custom browser endpoints should NOT have a proxy added
|
||||
if not prefer_fetch_backend.startswith('extra_browser_'):
|
||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
||||
else:
|
||||
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||
|
||||
logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
|
||||
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||
custom_browser_connection_url=custom_browser_connection_url,
|
||||
screenshot_format=self.screenshot_format
|
||||
)
|
||||
|
||||
if self.watch.has_browser_steps:
|
||||
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
if ua and ua.get(prefer_fetch_backend):
|
||||
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
||||
|
||||
request_headers.update(self.watch.get('headers', {}))
|
||||
request_headers.update(self.datastore.get_all_base_headers())
|
||||
request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid')))
|
||||
|
||||
# https://github.com/psf/requests/issues/4525
|
||||
# Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot
|
||||
# do this by accident.
|
||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
for header_name in request_headers:
|
||||
request_headers.update({header_name: jinja_render(template_str=request_headers.get(header_name))})
|
||||
|
||||
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||
|
||||
request_body = self.watch.get('body')
|
||||
if request_body:
|
||||
request_body = jinja_render(template_str=self.watch.get('body'))
|
||||
|
||||
request_method = self.watch.get('method')
|
||||
ignore_status_codes = self.watch.get('ignore_status_codes', False)
|
||||
|
||||
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
|
||||
if self.watch.get('webdriver_delay'):
|
||||
self.fetcher.render_extract_delay = self.watch.get('webdriver_delay')
|
||||
elif system_webdriver_delay is not None:
|
||||
self.fetcher.render_extract_delay = system_webdriver_delay
|
||||
|
||||
if self.watch.get('webdriver_js_execute_code') is not None and self.watch.get('webdriver_js_execute_code').strip():
|
||||
self.fetcher.webdriver_js_execute_code = self.watch.get('webdriver_js_execute_code')
|
||||
|
||||
# Requests for PDF's, images etc should be passwd the is_binary flag
|
||||
is_binary = self.watch.is_pdf
|
||||
|
||||
# And here we go! call the right browser with browser-specific settings
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
# All fetchers are now async
|
||||
await self.fetcher.run(
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
fetch_favicon=self.watch.favicon_is_expired(),
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
is_binary=is_binary,
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
screenshot_format=self.screenshot_format,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
watch_uuid=self.watch_uuid,
|
||||
)
|
||||
|
||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||
await self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
def get_extra_watch_config(self, filename):
|
||||
"""
|
||||
Read processor-specific JSON config file from watch data directory.
|
||||
|
||||
Args:
|
||||
filename: Name of JSON file (e.g., "visual_ssim_score.json")
|
||||
|
||||
Returns:
|
||||
dict: Parsed JSON data, or empty dict if file doesn't exist
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
|
||||
if not watch_data_dir:
|
||||
return {}
|
||||
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
return {}
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.warning(f"Failed to read extra watch config {filename}: {e}")
|
||||
return {}
|
||||
|
||||
def update_extra_watch_config(self, filename, data, merge=True):
|
||||
"""
|
||||
Write processor-specific JSON config file to watch data directory.
|
||||
|
||||
Args:
|
||||
filename: Name of JSON file (e.g., "visual_ssim_score.json")
|
||||
data: Dictionary to serialize as JSON
|
||||
merge: If True, merge with existing data; if False, overwrite completely
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
|
||||
if not watch_data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
|
||||
return
|
||||
|
||||
# Ensure directory exists
|
||||
watch.ensure_data_dir_exists()
|
||||
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
|
||||
try:
|
||||
# If merge is enabled, read existing data first
|
||||
existing_data = {}
|
||||
if merge and os.path.isfile(filepath):
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
existing_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.warning(f"Failed to read existing config for merge: {e}")
|
||||
|
||||
# Merge new data with existing
|
||||
if merge:
|
||||
existing_data.update(data)
|
||||
data_to_save = existing_data
|
||||
else:
|
||||
data_to_save = data
|
||||
|
||||
# Write the data
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(data_to_save, f, indent=2)
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to write extra watch config {filename}: {e}")
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, watch):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
changed_detected = False
|
||||
return changed_detected, update_obj, ''.encode('utf-8')
|
||||
132
changedetectionio/processors/extract.py
Normal file
132
changedetectionio/processors/extract.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""
|
||||
Base data extraction module for all processors.
|
||||
|
||||
This module handles extracting data from watch history using regex patterns
|
||||
and exporting to CSV format. This is the default extractor that all processors
|
||||
(text_json_diff, restock_diff, etc.) can use by default or override.
|
||||
"""
|
||||
|
||||
import os
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def render_form(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
|
||||
"""
|
||||
Render the data extraction form.
|
||||
|
||||
Args:
|
||||
watch: The watch object
|
||||
datastore: The ChangeDetectionStore instance
|
||||
request: Flask request object
|
||||
url_for: Flask url_for function
|
||||
render_template: Flask render_template function
|
||||
flash: Flask flash function
|
||||
redirect: Flask redirect function
|
||||
extract_form: Optional pre-built extract form (for error cases)
|
||||
|
||||
Returns:
|
||||
Rendered HTML response with the extraction form
|
||||
"""
|
||||
from changedetectionio import forms
|
||||
|
||||
uuid = watch.get('uuid')
|
||||
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(
|
||||
formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
|
||||
# Get error information for the template
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
||||
|
||||
# Use the shared default template from processors/templates/
|
||||
# Processors can override this by creating their own extract.py with custom template logic
|
||||
output = render_template(
|
||||
"extract.html",
|
||||
uuid=uuid,
|
||||
extract_form=extract_form,
|
||||
watch_a=watch,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
screenshot=screenshot_url,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||
extra_title=f" - {watch.label} - Extract Data",
|
||||
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
|
||||
pure_menu_fixed=False
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def process_extraction(watch, datastore, request, url_for, make_response, send_from_directory, flash, redirect, extract_form=None):
|
||||
"""
|
||||
Process the data extraction request and return CSV file.
|
||||
|
||||
Args:
|
||||
watch: The watch object
|
||||
datastore: The ChangeDetectionStore instance
|
||||
request: Flask request object
|
||||
url_for: Flask url_for function
|
||||
make_response: Flask make_response function
|
||||
send_from_directory: Flask send_from_directory function
|
||||
flash: Flask flash function
|
||||
redirect: Flask redirect function
|
||||
extract_form: Optional pre-built extract form
|
||||
|
||||
Returns:
|
||||
CSV file download response or redirect to form on error
|
||||
"""
|
||||
from changedetectionio import forms
|
||||
|
||||
uuid = watch.get('uuid')
|
||||
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(
|
||||
formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
|
||||
if not extract_form.validate():
|
||||
flash(gettext("An error occurred, please see below."), "error")
|
||||
# render_template needs to be imported from Flask for this to work
|
||||
from flask import render_template as flask_render_template
|
||||
return render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=flask_render_template,
|
||||
flash=flash,
|
||||
redirect=redirect,
|
||||
extract_form=extract_form
|
||||
)
|
||||
|
||||
extract_regex = request.form.get('extract_regex', '').strip()
|
||||
output = watch.extract_regex_from_all_history(extract_regex)
|
||||
|
||||
if output:
|
||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||
response.headers['Content-type'] = 'text/csv'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
response.headers['Expires'] = "0"
|
||||
return response
|
||||
|
||||
flash(gettext('No matches found while scanning all of the watch history for that RegEx.'), 'error')
|
||||
return redirect(url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid))
|
||||
210
changedetectionio/processors/image_ssim_diff/README.md
Normal file
210
changedetectionio/processors/image_ssim_diff/README.md
Normal file
@@ -0,0 +1,210 @@
|
||||
# Fast Screenshot Comparison Processor
|
||||
|
||||
Visual/screenshot change detection using ultra-fast image comparison algorithms.
|
||||
|
||||
## Overview
|
||||
|
||||
This processor uses **OpenCV** by default for screenshot comparison, providing **50-100x faster** performance compared to the previous SSIM implementation while still detecting meaningful visual changes.
|
||||
|
||||
## Current Features
|
||||
|
||||
- **Ultra-fast OpenCV comparison**: cv2.absdiff with Gaussian blur for noise reduction
|
||||
- **MD5 pre-check**: Fast identical image detection before expensive comparison
|
||||
- **Configurable sensitivity**: Threshold-based change detection
|
||||
- **Three-panel diff view**: Previous | Current | Difference (with red highlights)
|
||||
- **Direct image support**: Works with browser screenshots AND direct image URLs
|
||||
- **Visual selector support**: Compare specific page regions using CSS/XPath selectors
|
||||
- **Download images**: Download any of the three comparison images directly from the diff view
|
||||
|
||||
## Performance
|
||||
|
||||
- **OpenCV (default)**: 50-100x faster than SSIM
|
||||
- **Large screenshots**: Automatic downscaling for diff visualization (configurable via `MAX_DIFF_HEIGHT`/`MAX_DIFF_WIDTH`)
|
||||
- **Memory efficient**: Explicit cleanup of large objects for long-running processes
|
||||
- **JPEG diff images**: Smaller file sizes, faster rendering
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Fetch**: Screenshot captured via browser OR direct image URL fetched
|
||||
2. **MD5 Check**: Quick hash comparison - if identical, skip comparison
|
||||
3. **Region Selection** (optional): Crop to specific page region if visual selector is configured
|
||||
4. **OpenCV Comparison**: Fast pixel-level difference detection with Gaussian blur
|
||||
5. **Change Detection**: Percentage of changed pixels above threshold = change detected
|
||||
6. **Visualization**: Generate diff image with red-highlighted changed regions
|
||||
|
||||
## Architecture
|
||||
|
||||
### Default Method: OpenCV
|
||||
|
||||
The processor uses OpenCV's `cv2.absdiff()` for ultra-fast pixel-level comparison:
|
||||
|
||||
```python
|
||||
# Convert to grayscale
|
||||
gray_from = cv2.cvtColor(image_from, cv2.COLOR_RGB2GRAY)
|
||||
gray_to = cv2.cvtColor(image_to, cv2.COLOR_RGB2GRAY)
|
||||
|
||||
# Apply Gaussian blur (reduces noise, controlled by OPENCV_BLUR_SIGMA env var)
|
||||
gray_from = cv2.GaussianBlur(gray_from, (0, 0), sigma=0.8)
|
||||
gray_to = cv2.GaussianBlur(gray_to, (0, 0), sigma=0.8)
|
||||
|
||||
# Calculate absolute difference
|
||||
diff = cv2.absdiff(gray_from, gray_to)
|
||||
|
||||
# Apply threshold (default: 30)
|
||||
_, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Count changed pixels
|
||||
change_percentage = (changed_pixels / total_pixels) * 100
|
||||
```
|
||||
|
||||
### Optional: Pixelmatch
|
||||
|
||||
For users who need better anti-aliasing detection (especially for text-heavy screenshots), **pixelmatch** can be optionally installed:
|
||||
|
||||
```bash
|
||||
pip install pybind11-pixelmatch>=0.1.3
|
||||
```
|
||||
|
||||
**Note**: Pixelmatch uses a C++17 implementation via pybind11 and may have build issues on some platforms (particularly Alpine/musl systems with symbolic link security restrictions). The application will automatically fall back to OpenCV if pixelmatch is not available.
|
||||
|
||||
To use pixelmatch instead of OpenCV, set the environment variable:
|
||||
```bash
|
||||
COMPARISON_METHOD=pixelmatch
|
||||
```
|
||||
|
||||
#### When to use pixelmatch:
|
||||
- Screenshots with lots of text and anti-aliasing
|
||||
- Need to ignore minor font rendering differences between browser versions
|
||||
- 10-20x faster than SSIM (but slower than OpenCV)
|
||||
|
||||
#### When to stick with OpenCV (default):
|
||||
- General webpage monitoring
|
||||
- Maximum performance (50-100x faster than SSIM)
|
||||
- Simple pixel-level change detection
|
||||
- Avoid build dependencies (Alpine/musl systems)
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
```bash
|
||||
# Comparison method (opencv or pixelmatch)
|
||||
COMPARISON_METHOD=opencv # Default
|
||||
|
||||
# OpenCV threshold (0-255, lower = more sensitive)
|
||||
COMPARISON_THRESHOLD_OPENCV=30 # Default
|
||||
|
||||
# Pixelmatch threshold (0-100, mapped to 0-1 scale)
|
||||
COMPARISON_THRESHOLD_PIXELMATCH=10 # Default
|
||||
|
||||
# Gaussian blur sigma for OpenCV (0 = no blur, higher = more blur)
|
||||
OPENCV_BLUR_SIGMA=0.8 # Default
|
||||
|
||||
# Minimum change percentage to trigger detection
|
||||
OPENCV_MIN_CHANGE_PERCENT=0.1 # Default (0.1%)
|
||||
PIXELMATCH_MIN_CHANGE_PERCENT=0.1 # Default
|
||||
|
||||
# Diff visualization image size limits (pixels)
|
||||
MAX_DIFF_HEIGHT=8000 # Default
|
||||
MAX_DIFF_WIDTH=900 # Default
|
||||
```
|
||||
|
||||
### Per-Watch Configuration
|
||||
|
||||
- **Comparison Threshold**: Can be configured per-watch in the edit form
|
||||
- Very low sensitivity (10) - Only major changes
|
||||
- Low sensitivity (20) - Significant changes
|
||||
- Medium sensitivity (30) - Moderate changes (default)
|
||||
- High sensitivity (50) - Small changes
|
||||
- Very high sensitivity (75) - Any visible change
|
||||
|
||||
### Visual Selector (Region Comparison)
|
||||
|
||||
Use the "Include filters" field with CSS selectors or XPath to compare only specific page regions:
|
||||
|
||||
```
|
||||
.content-area
|
||||
//div[@id='main']
|
||||
```
|
||||
|
||||
The processor will automatically crop both screenshots to the bounding box of the first matched element.
|
||||
|
||||
## Dependencies
|
||||
|
||||
### Required
|
||||
- `opencv-python-headless>=4.8.0.76` - Fast image comparison
|
||||
- `Pillow (PIL)` - Image loading and manipulation
|
||||
- `numpy` - Array operations
|
||||
|
||||
### Optional
|
||||
- `pybind11-pixelmatch>=0.1.3` - Alternative comparison method with anti-aliasing detection
|
||||
|
||||
## Change Detection Interpretation
|
||||
|
||||
- **0%** = Identical images (or below minimum change threshold)
|
||||
- **0.1-1%** = Minor differences (anti-aliasing, slight rendering differences)
|
||||
- **1-5%** = Noticeable changes (text updates, small content changes)
|
||||
- **5-20%** = Significant changes (layout shifts, content additions)
|
||||
- **>20%** = Major differences (page redesign, large content changes)
|
||||
|
||||
## Technical Notes
|
||||
|
||||
### Memory Management
|
||||
```python
|
||||
# Explicit cleanup for long-running processes
|
||||
img.close() # Close PIL Images
|
||||
buffer.close() # Close BytesIO buffers
|
||||
del large_array # Mark numpy arrays for GC
|
||||
```
|
||||
|
||||
### Diff Image Generation
|
||||
- Format: JPEG (quality=85, optimized)
|
||||
- Highlight: Red overlay (50% blend with original)
|
||||
- Auto-downscaling: Large screenshots downscaled for faster rendering
|
||||
- Base64 embedded: For direct template rendering
|
||||
|
||||
### OpenCV Blur Parameters
|
||||
The Gaussian blur reduces sensitivity to:
|
||||
- Font rendering differences
|
||||
- Anti-aliasing variations
|
||||
- JPEG compression artifacts
|
||||
- Minor pixel shifts (1-2 pixels)
|
||||
|
||||
Increase `OPENCV_BLUR_SIGMA` to make comparison more tolerant of these differences.
|
||||
|
||||
## Comparison: OpenCV vs Pixelmatch vs SSIM
|
||||
|
||||
| Feature | OpenCV | Pixelmatch | SSIM (old) |
|
||||
|---------|--------|------------|------------|
|
||||
| **Speed** | 50-100x faster | 10-20x faster | Baseline |
|
||||
| **Anti-aliasing** | Via blur | Built-in detection | Built-in |
|
||||
| **Text sensitivity** | High | Medium (AA-aware) | Medium |
|
||||
| **Dependencies** | opencv-python-headless | pybind11-pixelmatch + C++ compiler | scikit-image |
|
||||
| **Alpine/musl support** | ✅ Yes | ⚠️ Build issues | ✅ Yes |
|
||||
| **Memory usage** | Low | Low | High |
|
||||
| **Best for** | General use, max speed | Text-heavy screenshots | Deprecated |
|
||||
|
||||
## Migration from SSIM
|
||||
|
||||
If you're upgrading from the old SSIM-based processor:
|
||||
|
||||
1. **Thresholds are different**: SSIM used 0-1 scale (higher = more similar), OpenCV uses 0-255 pixel difference (lower = more similar)
|
||||
2. **Default threshold**: Start with 30 for OpenCV, adjust based on your needs
|
||||
3. **Performance**: Expect dramatically faster comparisons, especially for large screenshots
|
||||
4. **Accuracy**: OpenCV is more sensitive to pixel-level changes; increase `OPENCV_BLUR_SIGMA` if you're getting false positives
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
Potential features for future consideration:
|
||||
|
||||
- **Change region detection**: Highlight specific areas that changed with bounding boxes
|
||||
- **Perceptual hashing**: Pre-screening filter for even faster checks
|
||||
- **Ignore regions**: Exclude specific page areas (ads, timestamps) from comparison
|
||||
- **Text extraction**: OCR-based text comparison for semantic changes
|
||||
- **Adaptive thresholds**: Different sensitivity for different page regions
|
||||
|
||||
## Resources
|
||||
|
||||
- [OpenCV Documentation](https://docs.opencv.org/)
|
||||
- [pybind11-pixelmatch GitHub](https://github.com/whtsky/pybind11-pixelmatch)
|
||||
- [Pixelmatch (original JS library)](https://github.com/mapbox/pixelmatch)
|
||||
32
changedetectionio/processors/image_ssim_diff/__init__.py
Normal file
32
changedetectionio/processors/image_ssim_diff/__init__.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""
|
||||
Visual/screenshot change detection using fast image comparison algorithms.
|
||||
|
||||
This processor compares screenshots using OpenCV (cv2.absdiff),
|
||||
which is 10-100x faster than SSIM while still detecting meaningful visual changes.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
processor_description = "Visual/Screenshot change detection (Fast)"
|
||||
processor_name = "image_ssim_diff"
|
||||
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
|
||||
|
||||
PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json"
|
||||
|
||||
# Subprocess timeout settings
|
||||
# Maximum time to wait for subprocess operations (seconds)
|
||||
POLL_TIMEOUT_ABSOLUTE = int(os.getenv('OPENCV_SUBPROCESS_TIMEOUT', '20'))
|
||||
|
||||
# Template tracking filename
|
||||
CROPPED_IMAGE_TEMPLATE_FILENAME = 'cropped_image_template.png'
|
||||
|
||||
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS = [
|
||||
('200', 'Low sensitivity (only major changes)'),
|
||||
('80', 'Medium sensitivity (moderate changes - recommended)'),
|
||||
('20', 'High sensitivity (small changes)'),
|
||||
('0', 'Very high sensitivity (any change)')
|
||||
]
|
||||
|
||||
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT=0.999
|
||||
OPENCV_BLUR_SIGMA=float(os.getenv("OPENCV_BLUR_SIGMA", "3.0"))
|
||||
441
changedetectionio/processors/image_ssim_diff/difference.py
Normal file
441
changedetectionio/processors/image_ssim_diff/difference.py
Normal file
@@ -0,0 +1,441 @@
|
||||
"""
|
||||
Screenshot diff visualization for fast image comparison processor.
|
||||
|
||||
All image operations now use ImageDiffHandler abstraction for clean separation
|
||||
of concerns and easy backend swapping (LibVIPS, OpenCV, PIL, etc.).
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT, PROCESSOR_CONFIG_NAME, \
|
||||
OPENCV_BLUR_SIGMA
|
||||
|
||||
# All image operations now use OpenCV via isolated_opencv subprocess handler
|
||||
# No direct handler imports needed - subprocess isolation handles everything
|
||||
|
||||
# Maximum dimensions for diff visualization (can be overridden via environment variable)
|
||||
# Large screenshots don't need full resolution for visual inspection
|
||||
# Reduced defaults to minimize memory usage - 2000px height is plenty for diff viewing
|
||||
MAX_DIFF_HEIGHT = int(os.getenv('MAX_DIFF_HEIGHT', '8000'))
|
||||
MAX_DIFF_WIDTH = int(os.getenv('MAX_DIFF_WIDTH', '900'))
|
||||
|
||||
|
||||
def get_asset(asset_name, watch, datastore, request):
|
||||
"""
|
||||
Get processor-specific binary assets for streaming.
|
||||
|
||||
Uses ImageDiffHandler for all image operations - no more multiprocessing needed
|
||||
as LibVIPS handles threading/memory internally.
|
||||
|
||||
Supported assets:
|
||||
- 'before': The previous/from screenshot
|
||||
- 'after': The current/to screenshot
|
||||
- 'rendered_diff': The generated diff visualization with red highlights
|
||||
|
||||
Args:
|
||||
asset_name: Name of the asset to retrieve ('before', 'after', 'rendered_diff')
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
request: Flask request (for from_version/to_version query params)
|
||||
|
||||
Returns:
|
||||
tuple: (binary_data, content_type, cache_control_header) or None if not found
|
||||
"""
|
||||
# Get version parameters from query string
|
||||
versions = list(watch.history.keys())
|
||||
|
||||
if len(versions) < 2:
|
||||
return None
|
||||
|
||||
from_version = request.args.get('from_version', versions[-2] if len(versions) >= 2 else versions[0])
|
||||
to_version = request.args.get('to_version', versions[-1])
|
||||
|
||||
# Validate versions exist
|
||||
if from_version not in versions:
|
||||
from_version = versions[-2] if len(versions) >= 2 else versions[0]
|
||||
if to_version not in versions:
|
||||
to_version = versions[-1]
|
||||
|
||||
try:
|
||||
if asset_name == 'before':
|
||||
# Return the 'from' screenshot with bounding box if configured
|
||||
img_bytes = watch.get_history_snapshot(timestamp=from_version)
|
||||
img_bytes = _draw_bounding_box_if_configured(img_bytes, watch, datastore)
|
||||
mime_type = _detect_mime_type(img_bytes)
|
||||
return (img_bytes, mime_type, 'public, max-age=3600')
|
||||
|
||||
elif asset_name == 'after':
|
||||
# Return the 'to' screenshot with bounding box if configured
|
||||
img_bytes = watch.get_history_snapshot(timestamp=to_version)
|
||||
img_bytes = _draw_bounding_box_if_configured(img_bytes, watch, datastore)
|
||||
mime_type = _detect_mime_type(img_bytes)
|
||||
return (img_bytes, mime_type, 'public, max-age=3600')
|
||||
|
||||
elif asset_name == 'rendered_diff':
|
||||
# Generate diff in isolated subprocess to prevent memory leaks
|
||||
# Subprocess provides complete memory isolation
|
||||
from .image_handler import isolated_opencv as process_screenshot_handler
|
||||
|
||||
img_bytes_from = watch.get_history_snapshot(timestamp=from_version)
|
||||
img_bytes_to = watch.get_history_snapshot(timestamp=to_version)
|
||||
|
||||
# Get pixel difference threshold sensitivity (per-watch > global)
|
||||
# This controls how different a pixel must be (0-255 scale) to count as "changed"
|
||||
from changedetectionio import processors
|
||||
processor_instance = processors.difference_detection_processor(datastore, watch.get('uuid'))
|
||||
processor_config = processor_instance.get_extra_watch_config(PROCESSOR_CONFIG_NAME)
|
||||
|
||||
pixel_difference_threshold_sensitivity = processor_config.get('pixel_difference_threshold_sensitivity')
|
||||
if not pixel_difference_threshold_sensitivity:
|
||||
pixel_difference_threshold_sensitivity = datastore.data['settings']['application'].get(
|
||||
'pixel_difference_threshold_sensitivity', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT)
|
||||
try:
|
||||
pixel_difference_threshold_sensitivity = int(pixel_difference_threshold_sensitivity)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(
|
||||
f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default")
|
||||
pixel_difference_threshold_sensitivity = SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
|
||||
|
||||
logger.debug(f"Pixel difference threshold sensitivity is {pixel_difference_threshold_sensitivity}")
|
||||
|
||||
|
||||
# Generate diff in isolated subprocess (async-safe)
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
# Async-safe wrapper: runs coroutine in new thread with its own event loop
|
||||
def run_async_in_thread():
|
||||
return asyncio.run(
|
||||
process_screenshot_handler.generate_diff_isolated(
|
||||
img_bytes_from,
|
||||
img_bytes_to,
|
||||
pixel_difference_threshold=int(pixel_difference_threshold_sensitivity),
|
||||
blur_sigma=OPENCV_BLUR_SIGMA,
|
||||
max_width=MAX_DIFF_WIDTH,
|
||||
max_height=MAX_DIFF_HEIGHT
|
||||
)
|
||||
)
|
||||
|
||||
# Run in thread to avoid blocking event loop if called from async context
|
||||
result_container = [None]
|
||||
exception_container = [None]
|
||||
|
||||
def thread_target():
|
||||
try:
|
||||
result_container[0] = run_async_in_thread()
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-Asset")
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
if exception_container[0]:
|
||||
raise exception_container[0]
|
||||
|
||||
diff_image_bytes = result_container[0]
|
||||
|
||||
if diff_image_bytes:
|
||||
# Note: Bounding box drawing on diff not yet implemented
|
||||
return (diff_image_bytes, 'image/jpeg', 'public, max-age=300')
|
||||
else:
|
||||
logger.error("Failed to generate diff in subprocess")
|
||||
return None
|
||||
|
||||
else:
|
||||
# Unknown asset
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get asset '{asset_name}': {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
return None
|
||||
|
||||
|
||||
def _detect_mime_type(img_bytes):
|
||||
"""
|
||||
Detect MIME type using puremagic (same as Watch.py).
|
||||
|
||||
Args:
|
||||
img_bytes: Image bytes
|
||||
|
||||
Returns:
|
||||
str: MIME type (e.g., 'image/png', 'image/jpeg')
|
||||
"""
|
||||
try:
|
||||
import puremagic
|
||||
detections = puremagic.magic_string(img_bytes[:2048])
|
||||
if detections:
|
||||
mime_type = detections[0].mime_type
|
||||
logger.trace(f"Detected MIME type: {mime_type}")
|
||||
return mime_type
|
||||
else:
|
||||
logger.trace("No MIME type detected, using 'image/png' fallback")
|
||||
return 'image/png'
|
||||
except Exception as e:
|
||||
logger.warning(f"puremagic detection failed: {e}, using 'image/png' fallback")
|
||||
return 'image/png'
|
||||
|
||||
|
||||
def _draw_bounding_box_if_configured(img_bytes, watch, datastore):
|
||||
"""
|
||||
Draw blue bounding box on image if configured in processor settings.
|
||||
Uses isolated subprocess to prevent memory leaks from large images.
|
||||
|
||||
Supports two modes:
|
||||
- "Select by element": Use include_filter to find xpath element bbox
|
||||
- "Draw area": Use manually drawn bounding_box from config
|
||||
|
||||
Args:
|
||||
img_bytes: Image bytes (PNG)
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
|
||||
Returns:
|
||||
Image bytes (possibly with bounding box drawn)
|
||||
"""
|
||||
try:
|
||||
# Get processor configuration
|
||||
from changedetectionio import processors
|
||||
processor_instance = processors.difference_detection_processor(datastore, watch.get('uuid'))
|
||||
processor_name = watch.get('processor', 'default')
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||
|
||||
if not processor_config:
|
||||
return img_bytes
|
||||
|
||||
selection_mode = processor_config.get('selection_mode', 'draw')
|
||||
x, y, width, height = None, None, None, None
|
||||
|
||||
# Mode 1: Select by element (use include_filter + xpath_data)
|
||||
if selection_mode == 'element':
|
||||
include_filters = watch.get('include_filters', [])
|
||||
|
||||
if include_filters and len(include_filters) > 0:
|
||||
first_filter = include_filters[0].strip()
|
||||
|
||||
# Get xpath_data from watch history
|
||||
history_keys = list(watch.history.keys())
|
||||
if history_keys:
|
||||
latest_snapshot = watch.get_history_snapshot(timestamp=history_keys[-1])
|
||||
xpath_data_path = watch.get_xpath_data_filepath(timestamp=history_keys[-1])
|
||||
|
||||
try:
|
||||
import gzip
|
||||
with gzip.open(xpath_data_path, 'rt') as f:
|
||||
xpath_data = json.load(f)
|
||||
|
||||
# Find matching element
|
||||
for element in xpath_data.get('size_pos', []):
|
||||
if element.get('xpath') == first_filter and element.get('highlight_as_custom_filter'):
|
||||
x = element.get('left', 0)
|
||||
y = element.get('top', 0)
|
||||
width = element.get('width', 0)
|
||||
height = element.get('height', 0)
|
||||
logger.debug(f"Found element bbox for filter '{first_filter}': x={x}, y={y}, w={width}, h={height}")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load xpath_data for element selection: {e}")
|
||||
|
||||
# Mode 2: Draw area (use manually configured bbox)
|
||||
else:
|
||||
bounding_box = processor_config.get('bounding_box')
|
||||
if bounding_box:
|
||||
# Parse bounding box: "x,y,width,height"
|
||||
parts = [int(p.strip()) for p in bounding_box.split(',')]
|
||||
if len(parts) == 4:
|
||||
x, y, width, height = parts
|
||||
else:
|
||||
logger.warning(f"Invalid bounding box format: {bounding_box}")
|
||||
|
||||
# If no bbox found, return original image
|
||||
if x is None or y is None or width is None or height is None:
|
||||
return img_bytes
|
||||
|
||||
# Use isolated subprocess to prevent memory leaks from large images
|
||||
from .image_handler import isolated_opencv
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
# Async-safe wrapper: runs coroutine in new thread with its own event loop
|
||||
# This prevents blocking when called from async context (update worker)
|
||||
def run_async_in_thread():
|
||||
return asyncio.run(
|
||||
isolated_opencv.draw_bounding_box_isolated(
|
||||
img_bytes, x, y, width, height,
|
||||
color=(255, 0, 0), # Blue in BGR format
|
||||
thickness=3
|
||||
)
|
||||
)
|
||||
|
||||
# Always run in thread to avoid blocking event loop if called from async context
|
||||
result_container = [None]
|
||||
exception_container = [None]
|
||||
|
||||
def thread_target():
|
||||
try:
|
||||
result_container[0] = run_async_in_thread()
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-BoundingBox")
|
||||
thread.start()
|
||||
thread.join(timeout=15)
|
||||
|
||||
if exception_container[0]:
|
||||
raise exception_container[0]
|
||||
|
||||
result = result_container[0]
|
||||
|
||||
# Return result or original if subprocess failed
|
||||
return result if result else img_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to draw bounding box: {e}")
|
||||
import traceback
|
||||
logger.debug(traceback.format_exc())
|
||||
return img_bytes
|
||||
|
||||
|
||||
def render(watch, datastore, request, url_for, render_template, flash, redirect):
|
||||
"""
|
||||
Render the screenshot comparison diff page.
|
||||
|
||||
Uses ImageDiffHandler for all image operations.
|
||||
|
||||
Args:
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
request: Flask request
|
||||
url_for: Flask url_for function
|
||||
render_template: Flask render_template function
|
||||
flash: Flask flash function
|
||||
redirect: Flask redirect function
|
||||
|
||||
Returns:
|
||||
Rendered template or redirect
|
||||
"""
|
||||
# Get version parameters (from_version, to_version)
|
||||
versions = list(watch.history.keys())
|
||||
|
||||
if len(versions) < 2:
|
||||
flash(gettext("Not enough history to compare. Need at least 2 snapshots."), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Default: compare latest two versions
|
||||
from_version = request.args.get('from_version', versions[-2] if len(versions) >= 2 else versions[0])
|
||||
to_version = request.args.get('to_version', versions[-1])
|
||||
|
||||
# Validate versions exist
|
||||
if from_version not in versions:
|
||||
from_version = versions[-2] if len(versions) >= 2 else versions[0]
|
||||
if to_version not in versions:
|
||||
to_version = versions[-1]
|
||||
|
||||
# Get pixel difference threshold sensitivity (per-watch > global > env default)
|
||||
pixel_difference_threshold_sensitivity = watch.get('pixel_difference_threshold_sensitivity')
|
||||
if not pixel_difference_threshold_sensitivity or pixel_difference_threshold_sensitivity == '':
|
||||
pixel_difference_threshold_sensitivity = datastore.data['settings']['application'].get('pixel_difference_threshold_sensitivity', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT)
|
||||
|
||||
# Convert to appropriate type
|
||||
try:
|
||||
pixel_difference_threshold_sensitivity = float(pixel_difference_threshold_sensitivity)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default")
|
||||
pixel_difference_threshold_sensitivity = 30.0
|
||||
|
||||
# Get blur sigma
|
||||
blur_sigma = OPENCV_BLUR_SIGMA
|
||||
|
||||
# Load screenshots from history
|
||||
try:
|
||||
img_bytes_from = watch.get_history_snapshot(timestamp=from_version)
|
||||
img_bytes_to = watch.get_history_snapshot(timestamp=to_version)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load screenshots: {e}")
|
||||
flash(gettext("Failed to load screenshots: {}").format(e), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Calculate change percentage using isolated subprocess to prevent memory leaks (async-safe)
|
||||
now = time.time()
|
||||
try:
|
||||
from .image_handler import isolated_opencv as process_screenshot_handler
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
# Async-safe wrapper: runs coroutine in new thread with its own event loop
|
||||
def run_async_in_thread():
|
||||
return asyncio.run(
|
||||
process_screenshot_handler.calculate_change_percentage_isolated(
|
||||
img_bytes_from,
|
||||
img_bytes_to,
|
||||
pixel_difference_threshold=int(pixel_difference_threshold_sensitivity),
|
||||
blur_sigma=blur_sigma,
|
||||
max_width=MAX_DIFF_WIDTH,
|
||||
max_height=MAX_DIFF_HEIGHT
|
||||
)
|
||||
)
|
||||
|
||||
# Run in thread to avoid blocking event loop if called from async context
|
||||
result_container = [None]
|
||||
exception_container = [None]
|
||||
|
||||
def thread_target():
|
||||
try:
|
||||
result_container[0] = run_async_in_thread()
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-ChangePercentage")
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
if exception_container[0]:
|
||||
raise exception_container[0]
|
||||
|
||||
change_percentage = result_container[0]
|
||||
|
||||
method_display = f"{process_screenshot_handler.IMPLEMENTATION_NAME} (pixel_diff_threshold: {pixel_difference_threshold_sensitivity:.0f})"
|
||||
logger.debug(f"Done change percentage calculation in {time.time() - now:.2f}s")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to calculate change percentage: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
flash(gettext("Failed to calculate diff: {}").format(e), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Load historical data if available (for charts/visualization)
|
||||
comparison_data = {}
|
||||
comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
|
||||
if os.path.isfile(comparison_config_path):
|
||||
try:
|
||||
with open(comparison_config_path, 'r') as f:
|
||||
comparison_data = json.load(f)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load comparison history data: {e}")
|
||||
|
||||
# Render custom template
|
||||
# Template path is namespaced to avoid conflicts with other processors
|
||||
# Images are now served via separate /processor-asset/ endpoints instead of base64
|
||||
return render_template(
|
||||
'image_ssim_diff/diff.html',
|
||||
change_percentage=change_percentage,
|
||||
comparison_data=comparison_data, # Full history for charts/visualization
|
||||
comparison_method=method_display,
|
||||
current_diff_url=watch['url'],
|
||||
from_version=from_version,
|
||||
percentage_different=change_percentage,
|
||||
threshold=pixel_difference_threshold_sensitivity,
|
||||
to_version=to_version,
|
||||
uuid=watch.get('uuid'),
|
||||
versions=versions,
|
||||
watch=watch,
|
||||
)
|
||||
151
changedetectionio/processors/image_ssim_diff/edit_hook.py
Normal file
151
changedetectionio/processors/image_ssim_diff/edit_hook.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""
|
||||
Optional hook called when processor settings are saved in edit page.
|
||||
|
||||
This hook analyzes the selected region to determine if template matching
|
||||
should be enabled for tracking content movement.
|
||||
|
||||
Template matching is controlled via ENABLE_TEMPLATE_TRACKING env var (default: False).
|
||||
"""
|
||||
|
||||
import io
|
||||
import os
|
||||
from loguru import logger
|
||||
from changedetectionio import strtobool
|
||||
from . import CROPPED_IMAGE_TEMPLATE_FILENAME
|
||||
|
||||
# Template matching controlled via environment variable (default: disabled)
|
||||
# Set ENABLE_TEMPLATE_TRACKING=True to enable
|
||||
TEMPLATE_MATCHING_ENABLED = strtobool(os.getenv('ENABLE_TEMPLATE_TRACKING', 'False'))
|
||||
IMPORT_ERROR = "Template matching disabled (set ENABLE_TEMPLATE_TRACKING=True to enable)"
|
||||
|
||||
|
||||
def on_config_save(watch, processor_config, datastore):
|
||||
"""
|
||||
Called after processor config is saved in edit page.
|
||||
|
||||
Analyzes the bounding box region to determine if it has enough
|
||||
visual features (texture/edges) to enable template matching for
|
||||
tracking content movement when page layout shifts.
|
||||
|
||||
Args:
|
||||
watch: Watch object
|
||||
processor_config: Dict of processor-specific config
|
||||
datastore: Datastore object
|
||||
|
||||
Returns:
|
||||
dict: Updated processor_config with auto_track_region setting
|
||||
"""
|
||||
# Check if template matching is globally enabled via ENV var
|
||||
if not TEMPLATE_MATCHING_ENABLED:
|
||||
logger.debug("Template tracking disabled via ENABLE_TEMPLATE_TRACKING env var")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
bounding_box = processor_config.get('bounding_box')
|
||||
|
||||
if not bounding_box:
|
||||
# No bounding box, disable tracking
|
||||
processor_config['auto_track_region'] = False
|
||||
logger.debug("No bounding box set, disabled auto-tracking")
|
||||
return processor_config
|
||||
|
||||
try:
|
||||
# Get the latest screenshot from watch history
|
||||
history_keys = list(watch.history.keys())
|
||||
if len(history_keys) == 0:
|
||||
logger.warning("No screenshot history available yet, cannot analyze for tracking")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
# Get latest screenshot
|
||||
latest_timestamp = history_keys[-1]
|
||||
screenshot_bytes = watch.get_history_snapshot(timestamp=latest_timestamp)
|
||||
|
||||
if not screenshot_bytes:
|
||||
logger.warning("Could not load screenshot for analysis")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
# Parse bounding box
|
||||
parts = [int(p.strip()) for p in bounding_box.split(',')]
|
||||
if len(parts) != 4:
|
||||
logger.warning("Invalid bounding box format")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
x, y, width, height = parts
|
||||
|
||||
# Analyze the region for features/texture
|
||||
has_enough_features = analyze_region_features(screenshot_bytes, x, y, width, height)
|
||||
|
||||
if has_enough_features:
|
||||
logger.info(f"Region has sufficient features for tracking - enabling auto_track_region")
|
||||
processor_config['auto_track_region'] = True
|
||||
|
||||
# Save the template as cropped.jpg in watch data directory
|
||||
save_template_to_file(watch, screenshot_bytes, x, y, width, height)
|
||||
|
||||
else:
|
||||
logger.info(f"Region lacks distinctive features - disabling auto_track_region")
|
||||
processor_config['auto_track_region'] = False
|
||||
|
||||
# Remove old template file if exists
|
||||
template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
if os.path.exists(template_path):
|
||||
os.remove(template_path)
|
||||
logger.debug(f"Removed old template file: {template_path}")
|
||||
|
||||
return processor_config
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing region for tracking: {e}")
|
||||
processor_config['auto_track_region'] = False
|
||||
return processor_config
|
||||
|
||||
|
||||
def analyze_region_features(screenshot_bytes, x, y, width, height):
|
||||
"""
|
||||
Analyze if a region has enough visual features for template matching.
|
||||
|
||||
Uses OpenCV to detect corners/edges. If the region has distinctive
|
||||
features, template matching can reliably track it when it moves.
|
||||
|
||||
Args:
|
||||
screenshot_bytes: Full screenshot as bytes
|
||||
x, y, width, height: Bounding box coordinates
|
||||
|
||||
Returns:
|
||||
bool: True if region has enough features, False otherwise
|
||||
"""
|
||||
# Template matching disabled - would need OpenCV implementation for region analysis
|
||||
if not TEMPLATE_MATCHING_ENABLED:
|
||||
logger.warning(f"Cannot analyze region features: {IMPORT_ERROR}")
|
||||
return False
|
||||
|
||||
# Note: Original implementation used LibVIPS handler to crop region, then OpenCV
|
||||
# for feature detection (goodFeaturesToTrack, Canny edge detection, variance).
|
||||
# If re-implementing, use OpenCV directly for both cropping and analysis.
|
||||
# Feature detection would use: cv2.goodFeaturesToTrack, cv2.Canny, np.var
|
||||
return False
|
||||
|
||||
|
||||
def save_template_to_file(watch, screenshot_bytes, x, y, width, height):
|
||||
"""
|
||||
Extract the template region and save as cropped_image_template.png in watch data directory.
|
||||
|
||||
This is a convenience wrapper around handler.save_template() that handles
|
||||
watch directory setup and path construction.
|
||||
|
||||
Args:
|
||||
watch: Watch object
|
||||
screenshot_bytes: Full screenshot as bytes
|
||||
x, y, width, height: Bounding box coordinates
|
||||
"""
|
||||
# Template matching disabled - would need OpenCV implementation for template saving
|
||||
if not TEMPLATE_MATCHING_ENABLED:
|
||||
logger.warning(f"Cannot save template: {IMPORT_ERROR}")
|
||||
return
|
||||
|
||||
# Note: Original implementation used LibVIPS handler to crop and save region.
|
||||
# If re-implementing, use OpenCV (cv2.imdecode, crop with array slicing, cv2.imwrite).
|
||||
return
|
||||
120
changedetectionio/processors/image_ssim_diff/forms.py
Normal file
120
changedetectionio/processors/image_ssim_diff/forms.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""
|
||||
Configuration forms for fast screenshot comparison processor.
|
||||
"""
|
||||
|
||||
from wtforms import SelectField, StringField, validators, ValidationError, IntegerField
|
||||
from flask_babel import lazy_gettext as _l
|
||||
from changedetectionio.forms import processor_text_json_diff_form
|
||||
import re
|
||||
|
||||
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS
|
||||
|
||||
|
||||
def validate_bounding_box(form, field):
|
||||
"""Validate bounding box format: x,y,width,height with integers."""
|
||||
if not field.data:
|
||||
return # Optional field
|
||||
|
||||
if len(field.data) > 100:
|
||||
raise ValidationError(_l('Bounding box value is too long'))
|
||||
|
||||
# Should be comma-separated integers
|
||||
if not re.match(r'^\d+,\d+,\d+,\d+$', field.data):
|
||||
raise ValidationError(_l('Bounding box must be in format: x,y,width,height (integers only)'))
|
||||
|
||||
# Validate values are reasonable (not negative, not ridiculously large)
|
||||
parts = [int(p) for p in field.data.split(',')]
|
||||
for part in parts:
|
||||
if part < 0:
|
||||
raise ValidationError(_l('Bounding box values must be non-negative'))
|
||||
if part > 10000: # Reasonable max screen dimension
|
||||
raise ValidationError(_l('Bounding box values are too large'))
|
||||
|
||||
|
||||
def validate_selection_mode(form, field):
|
||||
"""Validate selection mode value."""
|
||||
if not field.data:
|
||||
return # Optional field
|
||||
|
||||
if field.data not in ['element', 'draw']:
|
||||
raise ValidationError(_l('Selection mode must be either "element" or "draw"'))
|
||||
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
"""Form for fast image comparison processor settings."""
|
||||
|
||||
processor_config_min_change_percentage = IntegerField(
|
||||
_l('Minimum Change Percentage'),
|
||||
validators=[
|
||||
validators.Optional(),
|
||||
validators.NumberRange(min=1, max=100, message=_l('Must be between 0 and 100'))
|
||||
],
|
||||
render_kw={"placeholder": "Use global default (0.1)"}
|
||||
)
|
||||
|
||||
processor_config_pixel_difference_threshold_sensitivity = SelectField(
|
||||
_l('Pixel Difference Sensitivity'),
|
||||
choices=[
|
||||
('', _l('Use global default'))
|
||||
] + SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS,
|
||||
validators=[validators.Optional()],
|
||||
default=''
|
||||
)
|
||||
|
||||
# Processor-specific config fields (stored in separate JSON file)
|
||||
processor_config_bounding_box = StringField(
|
||||
_l('Bounding Box'),
|
||||
validators=[
|
||||
validators.Optional(),
|
||||
validators.Length(max=100, message=_l('Bounding box value is too long')),
|
||||
validate_bounding_box
|
||||
],
|
||||
render_kw={"style": "display: none;", "id": "bounding_box"}
|
||||
)
|
||||
|
||||
processor_config_selection_mode = StringField(
|
||||
_l('Selection Mode'),
|
||||
validators=[
|
||||
validators.Optional(),
|
||||
validators.Length(max=20, message=_l('Selection mode value is too long')),
|
||||
validate_selection_mode
|
||||
],
|
||||
render_kw={"style": "display: none;", "id": "selection_mode"}
|
||||
)
|
||||
|
||||
def extra_tab_content(self):
|
||||
"""Tab label for processor-specific settings."""
|
||||
return _l('Screenshot Comparison')
|
||||
|
||||
def extra_form_content(self):
|
||||
"""Render processor-specific form fields.
|
||||
@NOTE: prepend processor_config_* to the field name so it will save into its own datadir/uuid/image_ssim_diff.json and be read at process time
|
||||
"""
|
||||
return '''
|
||||
{% from '_helpers.html' import render_field %}
|
||||
<fieldset>
|
||||
<legend>Screenshot Comparison Settings</legend>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.processor_config_min_change_percentage) }}
|
||||
<span class="pure-form-message-inline">
|
||||
<strong>What percentage of pixels must change to trigger a detection?</strong><br>
|
||||
For example, <strong>0.1%</strong> means if 0.1% or more of the pixels change, it counts as a change.<br>
|
||||
Lower values = more sensitive (detect smaller changes).<br>
|
||||
Higher values = less sensitive (only detect larger changes).<br>
|
||||
Leave blank to use global default (0.1%).
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.processor_config_pixel_difference_threshold_sensitivity) }}
|
||||
<span class="pure-form-message-inline">
|
||||
<strong>How different must an individual pixel be to count as "changed"?</strong><br>
|
||||
<strong>Low sensitivity (75)</strong> = Only count pixels that changed significantly (0-255 scale).<br>
|
||||
<strong>High sensitivity (20)</strong> = Count pixels with small changes as different.<br>
|
||||
<strong>Very high (0)</strong> = Any pixel change counts.<br>
|
||||
Select "Use global default" to inherit the system-wide setting.
|
||||
</span>
|
||||
</div>
|
||||
</fieldset>
|
||||
'''
|
||||
@@ -0,0 +1,242 @@
|
||||
"""
|
||||
Abstract base class for image processing operations.
|
||||
|
||||
All image operations for the image_ssim_diff processor must be implemented
|
||||
through this interface to allow different backends (libvips, OpenCV, PIL, etc.).
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple, Optional, Any
|
||||
|
||||
|
||||
class ImageDiffHandler(ABC):
|
||||
"""
|
||||
Abstract base class for image processing operations.
|
||||
|
||||
Implementations must handle all image operations needed for screenshot
|
||||
comparison including loading, cropping, resizing, diffing, and overlays.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def load_from_bytes(self, img_bytes: bytes) -> Any:
|
||||
"""
|
||||
Load image from bytes.
|
||||
|
||||
Args:
|
||||
img_bytes: Image data as bytes (PNG, JPEG, etc.)
|
||||
|
||||
Returns:
|
||||
Handler-specific image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_to_bytes(self, img: Any, format: str = 'png', quality: int = 85) -> bytes:
|
||||
"""
|
||||
Save image to bytes.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
format: Output format ('png' or 'jpeg')
|
||||
quality: Quality for JPEG (1-100)
|
||||
|
||||
Returns:
|
||||
Image data as bytes
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def crop(self, img: Any, left: int, top: int, right: int, bottom: int) -> Any:
|
||||
"""
|
||||
Crop image to specified region.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
left: Left coordinate
|
||||
top: Top coordinate
|
||||
right: Right coordinate
|
||||
bottom: Bottom coordinate
|
||||
|
||||
Returns:
|
||||
Cropped image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def resize(self, img: Any, max_width: int, max_height: int) -> Any:
|
||||
"""
|
||||
Resize image maintaining aspect ratio.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
max_width: Maximum width in pixels
|
||||
max_height: Maximum height in pixels
|
||||
|
||||
Returns:
|
||||
Resized image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_dimensions(self, img: Any) -> Tuple[int, int]:
|
||||
"""
|
||||
Get image dimensions.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
|
||||
Returns:
|
||||
Tuple of (width, height)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def to_grayscale(self, img: Any) -> Any:
|
||||
"""
|
||||
Convert image to grayscale.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
|
||||
Returns:
|
||||
Grayscale image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def gaussian_blur(self, img: Any, sigma: float) -> Any:
|
||||
"""
|
||||
Apply Gaussian blur to image.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
sigma: Blur sigma value (0 = no blur)
|
||||
|
||||
Returns:
|
||||
Blurred image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def absolute_difference(self, img1: Any, img2: Any) -> Any:
|
||||
"""
|
||||
Calculate absolute difference between two images.
|
||||
|
||||
Args:
|
||||
img1: First image (handler-specific object)
|
||||
img2: Second image (handler-specific object)
|
||||
|
||||
Returns:
|
||||
Difference image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def threshold(self, img: Any, threshold_value: int) -> Tuple[float, Any]:
|
||||
"""
|
||||
Apply threshold to image and calculate change percentage.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object (typically grayscale difference)
|
||||
threshold_value: Threshold value (0-255)
|
||||
|
||||
Returns:
|
||||
Tuple of (change_percentage, binary_mask)
|
||||
- change_percentage: Percentage of pixels above threshold (0-100)
|
||||
- binary_mask: Handler-specific binary mask object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def apply_red_overlay(self, img: Any, mask: Any) -> bytes:
|
||||
"""
|
||||
Apply red overlay to image where mask is True.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object (color)
|
||||
mask: Handler-specific binary mask object
|
||||
|
||||
Returns:
|
||||
JPEG bytes with red overlay applied
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def close(self, img: Any) -> None:
|
||||
"""
|
||||
Clean up image resources if needed.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_template(
|
||||
self,
|
||||
img: Any,
|
||||
template_img: Any,
|
||||
original_bbox: Tuple[int, int, int, int],
|
||||
search_tolerance: float = 0.2
|
||||
) -> Optional[Tuple[int, int, int, int]]:
|
||||
"""
|
||||
Find template in image using template matching.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object to search in
|
||||
template_img: Handler-specific template image object to find
|
||||
original_bbox: Original bounding box (left, top, right, bottom)
|
||||
search_tolerance: How far to search (0.2 = ±20% of region size)
|
||||
|
||||
Returns:
|
||||
New bounding box (left, top, right, bottom) or None if not found
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save_template(
|
||||
self,
|
||||
img: Any,
|
||||
bbox: Tuple[int, int, int, int],
|
||||
output_path: str
|
||||
) -> bool:
|
||||
"""
|
||||
Save a cropped region as a template file.
|
||||
|
||||
Args:
|
||||
img: Handler-specific image object
|
||||
bbox: Bounding box to crop (left, top, right, bottom)
|
||||
output_path: Where to save the template PNG
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def draw_bounding_box(
|
||||
self,
|
||||
img_bytes: bytes,
|
||||
x: int,
|
||||
y: int,
|
||||
width: int,
|
||||
height: int,
|
||||
color: Tuple[int, int, int] = (255, 0, 0),
|
||||
thickness: int = 3
|
||||
) -> bytes:
|
||||
"""
|
||||
Draw a bounding box rectangle on image.
|
||||
|
||||
Args:
|
||||
img_bytes: Image data as bytes
|
||||
x: Left coordinate
|
||||
y: Top coordinate
|
||||
width: Box width
|
||||
height: Box height
|
||||
color: BGR color tuple (default: blue)
|
||||
thickness: Line thickness in pixels
|
||||
|
||||
Returns:
|
||||
Image bytes with bounding box drawn
|
||||
"""
|
||||
pass
|
||||
@@ -0,0 +1,351 @@
|
||||
"""
|
||||
Subprocess-isolated image operations for memory leak prevention.
|
||||
|
||||
LibVIPS accumulates C-level memory in long-running processes that cannot be
|
||||
reclaimed by Python's GC or libvips cache management. Using subprocess isolation
|
||||
ensures complete memory cleanup when the process exits.
|
||||
|
||||
This module wraps LibvipsImageDiffHandler operations in multiprocessing for
|
||||
complete memory isolation without code duplication.
|
||||
|
||||
Research: https://github.com/libvips/pyvips/issues/234
|
||||
"""
|
||||
|
||||
import multiprocessing
|
||||
|
||||
# CRITICAL: Use 'spawn' context instead of 'fork' to avoid inheriting parent's
|
||||
# LibVIPS threading state which can cause hangs in gaussblur operations
|
||||
# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
||||
|
||||
|
||||
def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Worker: Generate diff visualization using LibvipsImageDiffHandler in isolated subprocess.
|
||||
|
||||
This runs in a separate process for complete memory isolation.
|
||||
Uses print() instead of loguru to avoid forking issues.
|
||||
"""
|
||||
try:
|
||||
# Import handler inside worker
|
||||
from .libvips_handler import LibvipsImageDiffHandler
|
||||
|
||||
print(f"[Worker] Initializing handler", flush=True)
|
||||
handler = LibvipsImageDiffHandler()
|
||||
|
||||
# Load images using handler
|
||||
img_from = handler.load_from_bytes(img_bytes_from)
|
||||
img_to = handler.load_from_bytes(img_bytes_to)
|
||||
|
||||
# Ensure same size
|
||||
w1, h1 = handler.get_dimensions(img_from)
|
||||
w2, h2 = handler.get_dimensions(img_to)
|
||||
if (w1, h1) != (w2, h2):
|
||||
img_from = handler.resize(img_from, w2, h2)
|
||||
|
||||
# Downscale for faster diff visualization
|
||||
img_from = handler.resize(img_from, max_width, max_height)
|
||||
img_to = handler.resize(img_to, max_width, max_height)
|
||||
|
||||
# Convert to grayscale
|
||||
gray_from = handler.to_grayscale(img_from)
|
||||
gray_to = handler.to_grayscale(img_to)
|
||||
|
||||
# Optional blur - DISABLED due to LibVIPS threading issues in fork
|
||||
# gray_from = handler.gaussian_blur(gray_from, blur_sigma)
|
||||
# gray_to = handler.gaussian_blur(gray_to, blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
diff = handler.absolute_difference(gray_from, gray_to)
|
||||
|
||||
# Threshold to get mask
|
||||
_, diff_mask = handler.threshold(diff, int(threshold))
|
||||
|
||||
# Generate diff image with red overlay
|
||||
diff_image_bytes = handler.apply_red_overlay(img_to, diff_mask)
|
||||
|
||||
print(f"[Worker] Generated diff ({len(diff_image_bytes)} bytes)", flush=True)
|
||||
conn.send(diff_image_bytes)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Worker] Error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
conn.send(None)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def generate_diff_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Generate diff visualization in isolated subprocess for memory leak prevention.
|
||||
|
||||
Args:
|
||||
img_bytes_from: Previous screenshot bytes
|
||||
img_bytes_to: Current screenshot bytes
|
||||
threshold: Pixel difference threshold
|
||||
blur_sigma: Gaussian blur sigma
|
||||
max_width: Maximum width for diff
|
||||
max_height: Maximum height for diff
|
||||
|
||||
Returns:
|
||||
bytes: JPEG diff image or None on failure
|
||||
"""
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
target=_worker_generate_diff,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height)
|
||||
)
|
||||
p.start()
|
||||
|
||||
result = None
|
||||
try:
|
||||
# Wait for result (30 second timeout)
|
||||
if parent_conn.poll(30):
|
||||
result = parent_conn.recv()
|
||||
except Exception as e:
|
||||
print(f"[Parent] Error receiving result: {e}", flush=True)
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown
|
||||
p.join(timeout=5)
|
||||
if p.is_alive():
|
||||
print("[Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
p.terminate()
|
||||
p.join(timeout=3)
|
||||
|
||||
# Force kill if still alive
|
||||
if p.is_alive():
|
||||
print("[Parent] Process didn't terminate, killing", flush=True)
|
||||
p.kill()
|
||||
p.join(timeout=1)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Calculate change percentage in isolated subprocess using handler.
|
||||
|
||||
Returns:
|
||||
float: Change percentage
|
||||
"""
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
def _worker_calculate(conn):
|
||||
try:
|
||||
# Import handler inside worker
|
||||
from .libvips_handler import LibvipsImageDiffHandler
|
||||
|
||||
handler = LibvipsImageDiffHandler()
|
||||
|
||||
# Load images
|
||||
img_from = handler.load_from_bytes(img_bytes_from)
|
||||
img_to = handler.load_from_bytes(img_bytes_to)
|
||||
|
||||
# Ensure same size
|
||||
w1, h1 = handler.get_dimensions(img_from)
|
||||
w2, h2 = handler.get_dimensions(img_to)
|
||||
if (w1, h1) != (w2, h2):
|
||||
img_from = handler.resize(img_from, w2, h2)
|
||||
|
||||
# Downscale
|
||||
img_from = handler.resize(img_from, max_width, max_height)
|
||||
img_to = handler.resize(img_to, max_width, max_height)
|
||||
|
||||
# Convert to grayscale
|
||||
gray_from = handler.to_grayscale(img_from)
|
||||
gray_to = handler.to_grayscale(img_to)
|
||||
|
||||
# Optional blur
|
||||
gray_from = handler.gaussian_blur(gray_from, blur_sigma)
|
||||
gray_to = handler.gaussian_blur(gray_to, blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
diff = handler.absolute_difference(gray_from, gray_to)
|
||||
|
||||
# Threshold and get percentage
|
||||
change_percentage, _ = handler.threshold(diff, int(threshold))
|
||||
|
||||
conn.send(float(change_percentage))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Worker] Calculate error: {e}", flush=True)
|
||||
conn.send(0.0)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
p = ctx.Process(target=_worker_calculate, args=(child_conn,))
|
||||
p.start()
|
||||
|
||||
result = 0.0
|
||||
try:
|
||||
if parent_conn.poll(30):
|
||||
result = parent_conn.recv()
|
||||
except Exception as e:
|
||||
print(f"[Parent] Calculate error receiving result: {e}", flush=True)
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown
|
||||
p.join(timeout=5)
|
||||
if p.is_alive():
|
||||
print("[Parent] Calculate process didn't exit gracefully, terminating", flush=True)
|
||||
p.terminate()
|
||||
p.join(timeout=3)
|
||||
|
||||
# Force kill if still alive
|
||||
if p.is_alive():
|
||||
print("[Parent] Calculate process didn't terminate, killing", flush=True)
|
||||
p.kill()
|
||||
p.join(timeout=1)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def compare_images_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, min_change_percentage, crop_region=None):
|
||||
"""
|
||||
Compare images in isolated subprocess for change detection.
|
||||
|
||||
Args:
|
||||
img_bytes_from: Previous screenshot bytes
|
||||
img_bytes_to: Current screenshot bytes
|
||||
threshold: Pixel difference threshold
|
||||
blur_sigma: Gaussian blur sigma
|
||||
min_change_percentage: Minimum percentage to trigger change detection
|
||||
crop_region: Optional tuple (left, top, right, bottom) for cropping both images
|
||||
|
||||
Returns:
|
||||
tuple: (changed_detected, change_percentage)
|
||||
"""
|
||||
print(f"[Parent] Starting compare_images_isolated subprocess", flush=True)
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
def _worker_compare(conn):
|
||||
try:
|
||||
print(f"[Worker] Compare worker starting", flush=True)
|
||||
# Import handler inside worker
|
||||
from .libvips_handler import LibvipsImageDiffHandler
|
||||
|
||||
print(f"[Worker] Initializing handler", flush=True)
|
||||
handler = LibvipsImageDiffHandler()
|
||||
|
||||
# Load images
|
||||
print(f"[Worker] Loading images (from={len(img_bytes_from)} bytes, to={len(img_bytes_to)} bytes)", flush=True)
|
||||
img_from = handler.load_from_bytes(img_bytes_from)
|
||||
img_to = handler.load_from_bytes(img_bytes_to)
|
||||
print(f"[Worker] Images loaded", flush=True)
|
||||
|
||||
# Crop if region specified
|
||||
if crop_region:
|
||||
print(f"[Worker] Cropping to region {crop_region}", flush=True)
|
||||
left, top, right, bottom = crop_region
|
||||
img_from = handler.crop(img_from, left, top, right, bottom)
|
||||
img_to = handler.crop(img_to, left, top, right, bottom)
|
||||
print(f"[Worker] Cropping completed", flush=True)
|
||||
|
||||
# Ensure same size
|
||||
w1, h1 = handler.get_dimensions(img_from)
|
||||
w2, h2 = handler.get_dimensions(img_to)
|
||||
print(f"[Worker] Image dimensions: from={w1}x{h1}, to={w2}x{h2}", flush=True)
|
||||
if (w1, h1) != (w2, h2):
|
||||
print(f"[Worker] Resizing to match dimensions", flush=True)
|
||||
img_from = handler.resize(img_from, w2, h2)
|
||||
|
||||
# Convert to grayscale
|
||||
print(f"[Worker] Converting to grayscale", flush=True)
|
||||
gray_from = handler.to_grayscale(img_from)
|
||||
gray_to = handler.to_grayscale(img_to)
|
||||
|
||||
# Optional blur
|
||||
# NOTE: gaussblur can hang in forked subprocesses due to LibVIPS threading
|
||||
# Skip blur as a workaround - sigma=0.8 is subtle and comparison works without it
|
||||
if blur_sigma > 0:
|
||||
print(f"[Worker] Skipping blur (sigma={blur_sigma}) due to LibVIPS threading issues in fork", flush=True)
|
||||
# gray_from = handler.gaussian_blur(gray_from, blur_sigma)
|
||||
# gray_to = handler.gaussian_blur(gray_to, blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
print(f"[Worker] Calculating difference", flush=True)
|
||||
diff = handler.absolute_difference(gray_from, gray_to)
|
||||
|
||||
# Threshold and get percentage
|
||||
print(f"[Worker] Applying threshold ({threshold})", flush=True)
|
||||
change_percentage, _ = handler.threshold(diff, int(threshold))
|
||||
|
||||
# Determine if change detected
|
||||
changed_detected = change_percentage > min_change_percentage
|
||||
|
||||
print(f"[Worker] Comparison complete: changed={changed_detected}, percentage={change_percentage:.2f}%", flush=True)
|
||||
conn.send((changed_detected, float(change_percentage)))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Worker] Compare error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
conn.send((False, 0.0))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
p = ctx.Process(target=_worker_compare, args=(child_conn,))
|
||||
print(f"[Parent] Starting subprocess (pid will be assigned)", flush=True)
|
||||
p.start()
|
||||
print(f"[Parent] Subprocess started (pid={p.pid}), waiting for result (30s timeout)", flush=True)
|
||||
|
||||
result = (False, 0.0)
|
||||
try:
|
||||
if parent_conn.poll(30):
|
||||
print(f"[Parent] Result available, receiving", flush=True)
|
||||
result = parent_conn.recv()
|
||||
print(f"[Parent] Result received: {result}", flush=True)
|
||||
else:
|
||||
print(f"[Parent] Timeout waiting for result after 30s", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[Parent] Compare error receiving result: {e}", flush=True)
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown
|
||||
import time
|
||||
print(f"[Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
p.join(timeout=5)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print("[Parent] Compare process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
p.join(timeout=3)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
# Force kill if still alive
|
||||
if p.is_alive():
|
||||
print("[Parent] Compare process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
p.join(timeout=1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,627 @@
|
||||
"""
|
||||
OpenCV-based subprocess isolation for image comparison.
|
||||
|
||||
OpenCV is much more stable in multiprocessing contexts than LibVIPS.
|
||||
No threading issues, no fork problems, picklable functions.
|
||||
"""
|
||||
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
from .. import POLL_TIMEOUT_ABSOLUTE
|
||||
|
||||
# Public implementation name for logging
|
||||
IMPLEMENTATION_NAME = "OpenCV"
|
||||
|
||||
|
||||
def _worker_compare(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region):
|
||||
"""
|
||||
Worker function for image comparison (must be top-level for pickling with spawn).
|
||||
|
||||
Args:
|
||||
conn: Pipe connection for sending results
|
||||
img_bytes_from: Previous screenshot bytes
|
||||
img_bytes_to: Current screenshot bytes
|
||||
pixel_difference_threshold: Pixel-level sensitivity (0-255) - how different must a pixel be to count as changed
|
||||
blur_sigma: Gaussian blur sigma
|
||||
crop_region: Optional (left, top, right, bottom) crop coordinates
|
||||
"""
|
||||
import time
|
||||
try:
|
||||
import cv2
|
||||
|
||||
# CRITICAL: Disable OpenCV threading to prevent thread explosion
|
||||
# With multiprocessing, each subprocess would otherwise spawn threads equal to CPU cores
|
||||
# This causes excessive thread counts and memory overhead
|
||||
# Research: https://medium.com/@rachittayal7/a-note-on-opencv-threads-performance-in-prod-d10180716fba
|
||||
cv2.setNumThreads(1)
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Compare worker starting (threads=1 for memory optimization)", flush=True)
|
||||
|
||||
# Decode images from bytes
|
||||
print(f"[{time.time():.3f}] [Worker] Loading images (from={len(img_bytes_from)} bytes, to={len(img_bytes_to)} bytes)", flush=True)
|
||||
img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR)
|
||||
img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR)
|
||||
|
||||
# Check if decoding succeeded
|
||||
if img_from is None:
|
||||
raise ValueError("Failed to decode 'from' image - may be corrupt or unsupported format")
|
||||
if img_to is None:
|
||||
raise ValueError("Failed to decode 'to' image - may be corrupt or unsupported format")
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Images loaded: from={img_from.shape}, to={img_to.shape}", flush=True)
|
||||
|
||||
# Crop if region specified
|
||||
if crop_region:
|
||||
print(f"[{time.time():.3f}] [Worker] Cropping to region {crop_region}", flush=True)
|
||||
left, top, right, bottom = crop_region
|
||||
img_from = img_from[top:bottom, left:right]
|
||||
img_to = img_to[top:bottom, left:right]
|
||||
print(f"[{time.time():.3f}] [Worker] Cropped: from={img_from.shape}, to={img_to.shape}", flush=True)
|
||||
|
||||
# Resize if dimensions don't match
|
||||
if img_from.shape != img_to.shape:
|
||||
print(f"[{time.time():.3f}] [Worker] Resizing to match dimensions", flush=True)
|
||||
img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0]))
|
||||
|
||||
# Convert to grayscale
|
||||
print(f"[{time.time():.3f}] [Worker] Converting to grayscale", flush=True)
|
||||
gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY)
|
||||
gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Optional Gaussian blur
|
||||
if blur_sigma > 0:
|
||||
print(f"[{time.time():.3f}] [Worker] Applying Gaussian blur (sigma={blur_sigma})", flush=True)
|
||||
# OpenCV uses kernel size, convert sigma to kernel size: size = 2 * round(3*sigma) + 1
|
||||
ksize = int(2 * round(3 * blur_sigma)) + 1
|
||||
if ksize % 2 == 0: # Must be odd
|
||||
ksize += 1
|
||||
gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma)
|
||||
gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma)
|
||||
print(f"[{time.time():.3f}] [Worker] Blur applied (kernel={ksize}x{ksize})", flush=True)
|
||||
|
||||
# Calculate absolute difference
|
||||
print(f"[{time.time():.3f}] [Worker] Calculating absolute difference", flush=True)
|
||||
diff = cv2.absdiff(gray_from, gray_to)
|
||||
|
||||
# Apply threshold
|
||||
print(f"[{time.time():.3f}] [Worker] Applying pixel difference threshold ({pixel_difference_threshold})", flush=True)
|
||||
_, thresholded = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Calculate change percentage
|
||||
total_pixels = thresholded.size
|
||||
changed_pixels = np.count_nonzero(thresholded)
|
||||
change_percentage = (changed_pixels / total_pixels) * 100.0
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Comparison complete: percentage={change_percentage:.2f}%", flush=True)
|
||||
# Return only the score - let the caller decide if it's a "change"
|
||||
conn.send(float(change_percentage))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Worker] Error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Send error info as dict so parent can re-raise
|
||||
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def compare_images_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region=None):
|
||||
"""
|
||||
Compare images in isolated subprocess using OpenCV (async-safe).
|
||||
|
||||
Args:
|
||||
img_bytes_from: Previous screenshot bytes
|
||||
img_bytes_to: Current screenshot bytes
|
||||
pixel_difference_threshold: Pixel-level sensitivity (0-255) - how different must a pixel be to count as changed
|
||||
blur_sigma: Gaussian blur sigma
|
||||
crop_region: Optional (left, top, right, bottom) crop coordinates
|
||||
|
||||
Returns:
|
||||
float: Change percentage (0-100)
|
||||
"""
|
||||
import time
|
||||
import asyncio
|
||||
print(f"[{time.time():.3f}] [Parent] Starting OpenCV comparison subprocess", flush=True)
|
||||
|
||||
# Use spawn method for clean process (no fork issues)
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
target=_worker_compare,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region)
|
||||
)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
|
||||
p.start()
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
|
||||
|
||||
result = 0.0
|
||||
try:
|
||||
# Async-friendly polling: check in small intervals without blocking event loop
|
||||
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
|
||||
while time.time() < deadline:
|
||||
# Run poll() in thread to avoid blocking event loop
|
||||
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
|
||||
if has_data:
|
||||
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
|
||||
result = await asyncio.to_thread(parent_conn.recv)
|
||||
# Check if result is an error dict
|
||||
if isinstance(result, dict) and 'error' in result:
|
||||
raise RuntimeError(f"Image comparison failed: {result['error']}")
|
||||
print(f"[{time.time():.3f}] [Parent] Result received: {result:.2f}%", flush=True)
|
||||
break
|
||||
await asyncio.sleep(0) # Yield control to event loop
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical(f"[OpenCV subprocess] Timeout waiting for compare_images result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
|
||||
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
|
||||
raise TimeoutError(f"Image comparison subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Parent] Error receiving result: {e}", flush=True)
|
||||
raise
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown (async-safe)
|
||||
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
await asyncio.to_thread(p.join, 5)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
await asyncio.to_thread(p.join, 3)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
# Force kill if still alive
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
await asyncio.to_thread(p.join, 1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Worker function for generating visual diff with red overlay.
|
||||
"""
|
||||
import time
|
||||
try:
|
||||
import cv2
|
||||
|
||||
cv2.setNumThreads(1)
|
||||
print(f"[{time.time():.3f}] [Worker] Generate diff worker starting", flush=True)
|
||||
|
||||
# Decode images
|
||||
img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR)
|
||||
img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR)
|
||||
|
||||
# Resize if needed to match dimensions
|
||||
if img_from.shape != img_to.shape:
|
||||
img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0]))
|
||||
|
||||
# Downscale to max dimensions for faster processing
|
||||
h, w = img_to.shape[:2]
|
||||
if w > max_width or h > max_height:
|
||||
scale = min(max_width / w, max_height / h)
|
||||
new_w = int(w * scale)
|
||||
new_h = int(h * scale)
|
||||
img_from = cv2.resize(img_from, (new_w, new_h))
|
||||
img_to = cv2.resize(img_to, (new_w, new_h))
|
||||
|
||||
# Convert to grayscale
|
||||
gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY)
|
||||
gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Optional blur
|
||||
if blur_sigma > 0:
|
||||
ksize = int(2 * round(3 * blur_sigma)) + 1
|
||||
if ksize % 2 == 0:
|
||||
ksize += 1
|
||||
gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma)
|
||||
gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
diff = cv2.absdiff(gray_from, gray_to)
|
||||
|
||||
# Apply threshold to get mask
|
||||
_, mask = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Create red overlay on original 'to' image
|
||||
# Where mask is 255 (changed), blend 50% red
|
||||
overlay = img_to.copy()
|
||||
overlay[:, :, 2] = np.where(mask > 0,
|
||||
np.clip(overlay[:, :, 2] * 0.5 + 127, 0, 255).astype(np.uint8),
|
||||
overlay[:, :, 2])
|
||||
overlay[:, :, 0:2] = np.where(mask[:, :, np.newaxis] > 0,
|
||||
(overlay[:, :, 0:2] * 0.5).astype(np.uint8),
|
||||
overlay[:, :, 0:2])
|
||||
|
||||
# Encode as JPEG
|
||||
_, encoded = cv2.imencode('.jpg', overlay, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
||||
diff_bytes = encoded.tobytes()
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Generated diff ({len(diff_bytes)} bytes)", flush=True)
|
||||
conn.send(diff_bytes)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Worker] Generate diff error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Send error info as dict so parent can re-raise
|
||||
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def generate_diff_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Generate visual diff with red overlay in isolated subprocess (async-safe).
|
||||
|
||||
Returns:
|
||||
bytes: JPEG diff image or None on failure
|
||||
"""
|
||||
import time
|
||||
import asyncio
|
||||
print(f"[{time.time():.3f}] [Parent] Starting generate_diff subprocess", flush=True)
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
target=_worker_generate_diff,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height)
|
||||
)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
|
||||
p.start()
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
|
||||
|
||||
result = None
|
||||
try:
|
||||
# Async-friendly polling: check in small intervals without blocking event loop
|
||||
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
|
||||
while time.time() < deadline:
|
||||
# Run poll() in thread to avoid blocking event loop
|
||||
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
|
||||
if has_data:
|
||||
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
|
||||
result = await asyncio.to_thread(parent_conn.recv)
|
||||
# Check if result is an error dict
|
||||
if isinstance(result, dict) and 'error' in result:
|
||||
raise RuntimeError(f"Generate diff failed: {result['error']}")
|
||||
print(f"[{time.time():.3f}] [Parent] Result received ({len(result) if result else 0} bytes)", flush=True)
|
||||
break
|
||||
await asyncio.sleep(0) # Yield control to event loop
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical(f"[OpenCV subprocess] Timeout waiting for generate_diff result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
|
||||
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
|
||||
raise TimeoutError(f"Generate diff subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Parent] Error receiving diff: {e}", flush=True)
|
||||
raise
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown (async-safe)
|
||||
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
await asyncio.to_thread(p.join, 5)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
await asyncio.to_thread(p.join, 3)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
await asyncio.to_thread(p.join, 1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _worker_draw_bounding_box(conn, img_bytes, x, y, width, height, color, thickness):
|
||||
"""
|
||||
Worker function for drawing bounding box on image.
|
||||
"""
|
||||
import time
|
||||
try:
|
||||
import cv2
|
||||
|
||||
cv2.setNumThreads(1)
|
||||
print(f"[{time.time():.3f}] [Worker] Draw bounding box worker starting", flush=True)
|
||||
|
||||
# Decode image
|
||||
img = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
print(f"[{time.time():.3f}] [Worker] Failed to decode image", flush=True)
|
||||
conn.send(None)
|
||||
return
|
||||
|
||||
# Draw rectangle (BGR format)
|
||||
cv2.rectangle(img, (x, y), (x + width, y + height), color, thickness)
|
||||
|
||||
# Encode back to PNG
|
||||
_, encoded = cv2.imencode('.png', img)
|
||||
result_bytes = encoded.tobytes()
|
||||
|
||||
print(f"[{time.time():.3f}] [Worker] Bounding box drawn ({len(result_bytes)} bytes)", flush=True)
|
||||
conn.send(result_bytes)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Worker] Draw bounding box error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Send error info as dict so parent can re-raise
|
||||
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def draw_bounding_box_isolated(img_bytes, x, y, width, height, color=(255, 0, 0), thickness=3):
|
||||
"""
|
||||
Draw bounding box on image in isolated subprocess (async-safe).
|
||||
|
||||
Args:
|
||||
img_bytes: Image data as bytes
|
||||
x: Left coordinate
|
||||
y: Top coordinate
|
||||
width: Box width
|
||||
height: Box height
|
||||
color: BGR color tuple (default: blue)
|
||||
thickness: Line thickness in pixels
|
||||
|
||||
Returns:
|
||||
bytes: PNG image with bounding box or None on failure
|
||||
"""
|
||||
import time
|
||||
import asyncio
|
||||
print(f"[{time.time():.3f}] [Parent] Starting draw_bounding_box subprocess", flush=True)
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
target=_worker_draw_bounding_box,
|
||||
args=(child_conn, img_bytes, x, y, width, height, color, thickness)
|
||||
)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
|
||||
p.start()
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
|
||||
|
||||
result = None
|
||||
try:
|
||||
# Async-friendly polling: check in small intervals without blocking event loop
|
||||
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
|
||||
while time.time() < deadline:
|
||||
# Run poll() in thread to avoid blocking event loop
|
||||
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
|
||||
if has_data:
|
||||
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
|
||||
# Run recv() in thread too
|
||||
result = await asyncio.to_thread(parent_conn.recv)
|
||||
# Check if result is an error dict
|
||||
if isinstance(result, dict) and 'error' in result:
|
||||
raise RuntimeError(f"Draw bounding box failed: {result['error']}")
|
||||
print(f"[{time.time():.3f}] [Parent] Result received ({len(result) if result else 0} bytes)", flush=True)
|
||||
break
|
||||
# Yield control to event loop
|
||||
await asyncio.sleep(0)
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical(f"[OpenCV subprocess] Timeout waiting for draw_bounding_box result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
|
||||
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
|
||||
raise TimeoutError(f"Draw bounding box subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Parent] Error receiving result: {e}", flush=True)
|
||||
raise
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown (run join in thread to avoid blocking)
|
||||
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (3s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
await asyncio.to_thread(p.join, 3)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
await asyncio.to_thread(p.join, 2)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
await asyncio.to_thread(p.join, 1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _worker_calculate_percentage(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Worker function for calculating change percentage.
|
||||
"""
|
||||
import time
|
||||
try:
|
||||
import cv2
|
||||
|
||||
cv2.setNumThreads(1)
|
||||
|
||||
# Decode images
|
||||
img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR)
|
||||
img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR)
|
||||
|
||||
# Resize if needed
|
||||
if img_from.shape != img_to.shape:
|
||||
img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0]))
|
||||
|
||||
# Downscale to max dimensions
|
||||
h, w = img_to.shape[:2]
|
||||
if w > max_width or h > max_height:
|
||||
scale = min(max_width / w, max_height / h)
|
||||
new_w = int(w * scale)
|
||||
new_h = int(h * scale)
|
||||
img_from = cv2.resize(img_from, (new_w, new_h))
|
||||
img_to = cv2.resize(img_to, (new_w, new_h))
|
||||
|
||||
# Convert to grayscale
|
||||
gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY)
|
||||
gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Optional blur
|
||||
if blur_sigma > 0:
|
||||
ksize = int(2 * round(3 * blur_sigma)) + 1
|
||||
if ksize % 2 == 0:
|
||||
ksize += 1
|
||||
gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma)
|
||||
gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma)
|
||||
|
||||
# Calculate difference
|
||||
diff = cv2.absdiff(gray_from, gray_to)
|
||||
|
||||
# Apply threshold
|
||||
_, thresholded = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY)
|
||||
|
||||
# Calculate percentage
|
||||
total_pixels = thresholded.size
|
||||
changed_pixels = np.count_nonzero(thresholded)
|
||||
change_percentage = (changed_pixels / total_pixels) * 100.0
|
||||
|
||||
conn.send(float(change_percentage))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Worker] Calculate percentage error: {e}", flush=True)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Send error info as dict so parent can re-raise
|
||||
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
async def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
|
||||
"""
|
||||
Calculate change percentage in isolated subprocess (async-safe).
|
||||
|
||||
Returns:
|
||||
float: Change percentage
|
||||
"""
|
||||
import time
|
||||
import asyncio
|
||||
print(f"[{time.time():.3f}] [Parent] Starting calculate_percentage subprocess", flush=True)
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
target=_worker_calculate_percentage,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height)
|
||||
)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
|
||||
p.start()
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
|
||||
|
||||
result = 0.0
|
||||
try:
|
||||
# Async-friendly polling: check in small intervals without blocking event loop
|
||||
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
|
||||
while time.time() < deadline:
|
||||
# Run poll() in thread to avoid blocking event loop
|
||||
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
|
||||
if has_data:
|
||||
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
|
||||
result = await asyncio.to_thread(parent_conn.recv)
|
||||
# Check if result is an error dict
|
||||
if isinstance(result, dict) and 'error' in result:
|
||||
raise RuntimeError(f"Calculate change percentage failed: {result['error']}")
|
||||
print(f"[{time.time():.3f}] [Parent] Result received: {result:.2f}%", flush=True)
|
||||
break
|
||||
await asyncio.sleep(0) # Yield control to event loop
|
||||
else:
|
||||
from loguru import logger
|
||||
logger.critical(f"[OpenCV subprocess] Timeout waiting for calculate_change_percentage result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
|
||||
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
|
||||
raise TimeoutError(f"Calculate change percentage subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
|
||||
except Exception as e:
|
||||
print(f"[{time.time():.3f}] [Parent] Error receiving percentage: {e}", flush=True)
|
||||
raise
|
||||
finally:
|
||||
# Always close pipe first
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Try graceful shutdown (async-safe)
|
||||
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
|
||||
join_start = time.time()
|
||||
await asyncio.to_thread(p.join, 5)
|
||||
join_elapsed = time.time() - join_start
|
||||
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
|
||||
term_start = time.time()
|
||||
p.terminate()
|
||||
await asyncio.to_thread(p.join, 3)
|
||||
term_elapsed = time.time() - term_start
|
||||
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
|
||||
|
||||
if p.is_alive():
|
||||
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
|
||||
kill_start = time.time()
|
||||
p.kill()
|
||||
await asyncio.to_thread(p.join, 1)
|
||||
kill_elapsed = time.time() - kill_start
|
||||
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
|
||||
|
||||
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,340 @@
|
||||
"""
|
||||
LibVIPS implementation of ImageDiffHandler.
|
||||
|
||||
Uses pyvips for high-performance image processing with streaming architecture
|
||||
and low memory footprint. Ideal for large screenshots (8000px+).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import os
|
||||
from typing import Tuple, Any, TYPE_CHECKING
|
||||
from loguru import logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pyvips
|
||||
|
||||
try:
|
||||
import pyvips
|
||||
PYVIPS_AVAILABLE = True
|
||||
except ImportError:
|
||||
PYVIPS_AVAILABLE = False
|
||||
logger.warning("pyvips not available - install with: pip install pyvips")
|
||||
|
||||
from . import ImageDiffHandler
|
||||
|
||||
|
||||
class LibvipsImageDiffHandler(ImageDiffHandler):
|
||||
"""
|
||||
LibVIPS implementation using streaming architecture.
|
||||
|
||||
Benefits:
|
||||
- 3x faster than ImageMagick
|
||||
- 5x less memory than PIL
|
||||
- Automatic multi-threading
|
||||
- Streaming - processes images in chunks
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
if not PYVIPS_AVAILABLE:
|
||||
raise ImportError("pyvips is not installed. Install with: pip install pyvips")
|
||||
|
||||
def load_from_bytes(self, img_bytes: bytes) -> pyvips.Image:
|
||||
"""Load image from bytes using libvips streaming."""
|
||||
return pyvips.Image.new_from_buffer(img_bytes, '')
|
||||
|
||||
def save_to_bytes(self, img: pyvips.Image, format: str = 'png', quality: int = 85) -> bytes:
|
||||
"""
|
||||
Save image to bytes using temp file.
|
||||
|
||||
Note: Uses temp file instead of write_to_buffer() to avoid C memory leak.
|
||||
See: https://github.com/libvips/pyvips/issues/234
|
||||
"""
|
||||
import tempfile
|
||||
|
||||
format = format.lower()
|
||||
|
||||
try:
|
||||
if format == 'png':
|
||||
suffix = '.png'
|
||||
write_args = {'compression': 6}
|
||||
elif format in ['jpg', 'jpeg']:
|
||||
suffix = '.jpg'
|
||||
write_args = {'Q': quality}
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {format}")
|
||||
|
||||
# Use temp file to avoid write_to_buffer() memory leak
|
||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
||||
temp_path = tmp.name
|
||||
|
||||
# Write to file
|
||||
img.write_to_file(temp_path, **write_args)
|
||||
|
||||
# Read bytes and clean up
|
||||
with open(temp_path, 'rb') as f:
|
||||
image_bytes = f.read()
|
||||
|
||||
os.unlink(temp_path)
|
||||
return image_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save via temp file: {e}")
|
||||
# Fallback to write_to_buffer if temp file fails
|
||||
if format == 'png':
|
||||
return img.write_to_buffer('.png', compression=6)
|
||||
else:
|
||||
return img.write_to_buffer('.jpg', Q=quality)
|
||||
|
||||
def crop(self, img: pyvips.Image, left: int, top: int, right: int, bottom: int) -> pyvips.Image:
|
||||
"""Crop image using libvips."""
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
return img.crop(left, top, width, height)
|
||||
|
||||
def resize(self, img: pyvips.Image, max_width: int, max_height: int) -> pyvips.Image:
|
||||
"""
|
||||
Resize image maintaining aspect ratio.
|
||||
|
||||
Uses thumbnail_image for efficient downscaling with streaming.
|
||||
"""
|
||||
width, height = img.width, img.height
|
||||
|
||||
if width <= max_width and height <= max_height:
|
||||
return img
|
||||
|
||||
# Calculate scaling to fit within max dimensions
|
||||
width_ratio = max_width / width if width > max_width else 1.0
|
||||
height_ratio = max_height / height if height > max_height else 1.0
|
||||
ratio = min(width_ratio, height_ratio)
|
||||
|
||||
new_width = int(width * ratio)
|
||||
new_height = int(height * ratio)
|
||||
|
||||
logger.debug(f"Resizing image: {width}x{height} -> {new_width}x{new_height}")
|
||||
|
||||
# thumbnail_image is faster than resize for downscaling
|
||||
return img.thumbnail_image(new_width, height=new_height)
|
||||
|
||||
def get_dimensions(self, img: pyvips.Image) -> Tuple[int, int]:
|
||||
"""Get image dimensions."""
|
||||
return (img.width, img.height)
|
||||
|
||||
def to_grayscale(self, img: pyvips.Image) -> pyvips.Image:
|
||||
"""Convert to grayscale using 'b-w' colorspace."""
|
||||
return img.colourspace('b-w')
|
||||
|
||||
def gaussian_blur(self, img: pyvips.Image, sigma: float) -> pyvips.Image:
|
||||
"""Apply Gaussian blur."""
|
||||
if sigma > 0:
|
||||
return img.gaussblur(sigma)
|
||||
return img
|
||||
|
||||
def absolute_difference(self, img1: pyvips.Image, img2: pyvips.Image) -> pyvips.Image:
|
||||
"""
|
||||
Calculate absolute difference using operator overloading.
|
||||
|
||||
LibVIPS supports arithmetic operations between images.
|
||||
"""
|
||||
return (img1 - img2).abs()
|
||||
|
||||
def threshold(self, img: pyvips.Image, threshold_value: int) -> Tuple[float, pyvips.Image]:
|
||||
"""
|
||||
Apply threshold and calculate change percentage.
|
||||
|
||||
Uses ifthenelse for efficient thresholding.
|
||||
"""
|
||||
# Create binary mask: pixels above threshold = 255, others = 0
|
||||
mask = (img > threshold_value).ifthenelse(255, 0)
|
||||
|
||||
# Calculate percentage by averaging mask values
|
||||
# avg() returns mean pixel value (0-255)
|
||||
# Divide by 255 to get proportion, multiply by 100 for percentage
|
||||
mean_value = mask.avg()
|
||||
change_percentage = (mean_value / 255.0) * 100.0
|
||||
|
||||
return float(change_percentage), mask
|
||||
|
||||
def apply_red_overlay(self, img: pyvips.Image, mask: pyvips.Image) -> bytes:
|
||||
"""
|
||||
Apply red overlay where mask is True (50% blend).
|
||||
|
||||
Args:
|
||||
img: Color image (will be converted to RGB if needed)
|
||||
mask: Binary mask (255 where changed, 0 elsewhere)
|
||||
|
||||
Returns:
|
||||
JPEG bytes with red overlay
|
||||
"""
|
||||
import tempfile
|
||||
|
||||
# Ensure RGB colorspace
|
||||
if img.bands == 1:
|
||||
img = img.colourspace('srgb')
|
||||
|
||||
# Normalize mask to 0-1 range for blending
|
||||
mask_normalized = mask / 255.0
|
||||
|
||||
# Split into R, G, B channels
|
||||
channels = img.bandsplit()
|
||||
r, g, b = channels[0], channels[1], channels[2]
|
||||
|
||||
# Apply red overlay (50% blend):
|
||||
# Where mask is 1: blend 50% original with 50% red (255)
|
||||
# Where mask is 0: keep original
|
||||
r = r * (1 - mask_normalized * 0.5) + 127.5 * mask_normalized
|
||||
g = g * (1 - mask_normalized * 0.5)
|
||||
b = b * (1 - mask_normalized * 0.5)
|
||||
|
||||
# Recombine channels
|
||||
result = r.bandjoin([g, b])
|
||||
|
||||
# CRITICAL: Use temp file instead of write_to_buffer()
|
||||
# write_to_buffer() leaks C memory that isn't returned to OS
|
||||
# See: https://github.com/libvips/pyvips/issues/234
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp:
|
||||
temp_path = tmp.name
|
||||
|
||||
# Write to file (doesn't leak like write_to_buffer)
|
||||
result.write_to_file(temp_path, Q=85)
|
||||
|
||||
# Read bytes and clean up
|
||||
with open(temp_path, 'rb') as f:
|
||||
image_bytes = f.read()
|
||||
|
||||
os.unlink(temp_path)
|
||||
return image_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write image via temp file: {e}")
|
||||
# Fallback to write_to_buffer if temp file fails
|
||||
return result.write_to_buffer('.jpg', Q=85)
|
||||
|
||||
def close(self, img: pyvips.Image) -> None:
|
||||
"""
|
||||
LibVIPS uses automatic reference counting.
|
||||
|
||||
No explicit cleanup needed - memory freed when references drop to zero.
|
||||
"""
|
||||
pass
|
||||
|
||||
def find_template(
|
||||
self,
|
||||
img: pyvips.Image,
|
||||
template_img: pyvips.Image,
|
||||
original_bbox: Tuple[int, int, int, int],
|
||||
search_tolerance: float = 0.2
|
||||
) -> Tuple[int, int, int, int]:
|
||||
"""
|
||||
Find template in image using OpenCV template matching.
|
||||
|
||||
Note: This temporarily converts to numpy for OpenCV operations since
|
||||
libvips doesn't have template matching built-in.
|
||||
"""
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
left, top, right, bottom = original_bbox
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
|
||||
# Calculate search region
|
||||
margin_x = int(width * search_tolerance)
|
||||
margin_y = int(height * search_tolerance)
|
||||
|
||||
search_left = max(0, left - margin_x)
|
||||
search_top = max(0, top - margin_y)
|
||||
search_right = min(img.width, right + margin_x)
|
||||
search_bottom = min(img.height, bottom + margin_y)
|
||||
|
||||
# Crop search region
|
||||
search_region = self.crop(img, search_left, search_top, search_right, search_bottom)
|
||||
|
||||
# Convert to numpy arrays for OpenCV
|
||||
search_array = np.ndarray(
|
||||
buffer=search_region.write_to_memory(),
|
||||
dtype=np.uint8,
|
||||
shape=[search_region.height, search_region.width, search_region.bands]
|
||||
)
|
||||
template_array = np.ndarray(
|
||||
buffer=template_img.write_to_memory(),
|
||||
dtype=np.uint8,
|
||||
shape=[template_img.height, template_img.width, template_img.bands]
|
||||
)
|
||||
|
||||
# Convert to grayscale
|
||||
if len(search_array.shape) == 3:
|
||||
search_gray = cv2.cvtColor(search_array, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
search_gray = search_array
|
||||
|
||||
if len(template_array.shape) == 3:
|
||||
template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
template_gray = template_array
|
||||
|
||||
logger.debug(f"Searching for template in region: ({search_left}, {search_top}) to ({search_right}, {search_bottom})")
|
||||
|
||||
# Perform template matching
|
||||
result = cv2.matchTemplate(search_gray, template_gray, cv2.TM_CCOEFF_NORMED)
|
||||
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
||||
|
||||
logger.debug(f"Template matching confidence: {max_val:.2%}")
|
||||
|
||||
# Check if match is good enough (80% confidence threshold)
|
||||
if max_val >= 0.8:
|
||||
# Calculate new bounding box in original image coordinates
|
||||
match_x = search_left + max_loc[0]
|
||||
match_y = search_top + max_loc[1]
|
||||
|
||||
new_bbox = (match_x, match_y, match_x + width, match_y + height)
|
||||
|
||||
# Calculate movement distance
|
||||
move_x = abs(match_x - left)
|
||||
move_y = abs(match_y - top)
|
||||
|
||||
logger.info(f"Template found at ({match_x}, {match_y}), "
|
||||
f"moved {move_x}px horizontally, {move_y}px vertically, "
|
||||
f"confidence: {max_val:.2%}")
|
||||
|
||||
return new_bbox
|
||||
else:
|
||||
logger.warning(f"Template match confidence too low: {max_val:.2%} (need 80%)")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Template matching error: {e}")
|
||||
return None
|
||||
|
||||
def save_template(
|
||||
self,
|
||||
img: pyvips.Image,
|
||||
bbox: Tuple[int, int, int, int],
|
||||
output_path: str
|
||||
) -> bool:
|
||||
"""
|
||||
Save a cropped region as a template file.
|
||||
"""
|
||||
import os
|
||||
|
||||
try:
|
||||
left, top, right, bottom = bbox
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
|
||||
# Ensure output directory exists
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# Crop template region
|
||||
template = self.crop(img, left, top, right, bottom)
|
||||
|
||||
# Save as PNG
|
||||
template.write_to_file(output_path, compression=6)
|
||||
|
||||
logger.info(f"Saved template: {output_path} ({width}x{height}px)")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save template: {e}")
|
||||
return False
|
||||
110
changedetectionio/processors/image_ssim_diff/preview.py
Normal file
110
changedetectionio/processors/image_ssim_diff/preview.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
Preview rendering for SSIM screenshot processor.
|
||||
|
||||
Renders images properly in the browser instead of showing raw bytes.
|
||||
"""
|
||||
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def get_asset(asset_name, watch, datastore, request):
|
||||
"""
|
||||
Get processor-specific binary assets for preview streaming.
|
||||
|
||||
This function supports serving images as separate HTTP responses instead
|
||||
of embedding them as base64 in the HTML template, solving memory issues
|
||||
with large screenshots.
|
||||
|
||||
Supported assets:
|
||||
- 'screenshot': The screenshot for the specified version
|
||||
|
||||
Args:
|
||||
asset_name: Name of the asset to retrieve ('screenshot')
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
request: Flask request (for version query param)
|
||||
|
||||
Returns:
|
||||
tuple: (binary_data, content_type, cache_control_header) or None if not found
|
||||
"""
|
||||
if asset_name != 'screenshot':
|
||||
return None
|
||||
|
||||
versions = list(watch.history.keys())
|
||||
if len(versions) == 0:
|
||||
return None
|
||||
|
||||
# Get the version from query string (default: latest)
|
||||
preferred_version = request.args.get('version')
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
timestamp = preferred_version
|
||||
|
||||
try:
|
||||
screenshot_bytes = watch.get_history_snapshot(timestamp=timestamp)
|
||||
|
||||
# Verify we got bytes (should always be bytes for image files)
|
||||
if not isinstance(screenshot_bytes, bytes):
|
||||
logger.error(f"Expected bytes but got {type(screenshot_bytes)} for screenshot at {timestamp}")
|
||||
return None
|
||||
|
||||
# Detect image format using puremagic (same as Watch.py)
|
||||
try:
|
||||
import puremagic
|
||||
detections = puremagic.magic_string(screenshot_bytes[:2048])
|
||||
if detections:
|
||||
mime_type = detections[0].mime_type
|
||||
logger.trace(f"Detected MIME type: {mime_type}")
|
||||
else:
|
||||
mime_type = 'image/png' # Default fallback
|
||||
except Exception as e:
|
||||
logger.warning(f"puremagic detection failed: {e}, using 'image/png' fallback")
|
||||
mime_type = 'image/png'
|
||||
|
||||
return (screenshot_bytes, mime_type, 'public, max-age=10')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load screenshot for preview asset: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def render(watch, datastore, request, url_for, render_template, flash, redirect):
|
||||
"""
|
||||
Render the preview page for screenshot watches.
|
||||
|
||||
Args:
|
||||
watch: Watch object
|
||||
datastore: Datastore object
|
||||
request: Flask request
|
||||
url_for: Flask url_for function
|
||||
render_template: Flask render_template function
|
||||
flash: Flask flash function
|
||||
redirect: Flask redirect function
|
||||
|
||||
Returns:
|
||||
Rendered template or redirect
|
||||
"""
|
||||
versions = list(watch.history.keys())
|
||||
|
||||
if len(versions) == 0:
|
||||
flash(gettext("Preview unavailable - No snapshots captured yet"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Get the version to display (default: latest)
|
||||
preferred_version = request.args.get('version')
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
timestamp = preferred_version
|
||||
|
||||
# Render custom template for image preview
|
||||
# Screenshot is now served via separate /processor-asset/ endpoint instead of base64
|
||||
# This significantly reduces memory usage by not embedding large images in HTML
|
||||
return render_template(
|
||||
'image_ssim_diff/preview.html',
|
||||
watch=watch,
|
||||
uuid=watch.get('uuid'),
|
||||
versions=versions,
|
||||
timestamp=timestamp,
|
||||
current_diff_url=watch['url']
|
||||
)
|
||||
244
changedetectionio/processors/image_ssim_diff/processor.py
Normal file
244
changedetectionio/processors/image_ssim_diff/processor.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""
|
||||
Core fast screenshot comparison processor.
|
||||
|
||||
Uses OpenCV with subprocess isolation for high-performance, low-memory
|
||||
image processing. All operations run in isolated subprocesses for complete
|
||||
memory cleanup and stability.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
from loguru import logger
|
||||
from changedetectionio.processors.exceptions import ProcessorException
|
||||
from . import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT, PROCESSOR_CONFIG_NAME, OPENCV_BLUR_SIGMA
|
||||
from ..base import difference_detection_processor, SCREENSHOT_FORMAT_PNG
|
||||
|
||||
# All image operations now use OpenCV via isolated_opencv subprocess handler
|
||||
# Template matching temporarily disabled pending OpenCV implementation
|
||||
|
||||
# Translation marker for extraction
|
||||
def _(x): return x
|
||||
name = _('Visual / Image screenshot change detection')
|
||||
description = _('Compares screenshots using fast OpenCV algorithm, 10-100x faster than SSIM')
|
||||
del _
|
||||
processor_weight = 2
|
||||
list_badge_text = "Visual"
|
||||
|
||||
class perform_site_check(difference_detection_processor):
|
||||
"""Fast screenshot comparison processor using OpenCV."""
|
||||
|
||||
# Override to use PNG format for better image comparison (JPEG compression creates noise)
|
||||
screenshot_format = SCREENSHOT_FORMAT_PNG
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
"""
|
||||
Perform screenshot comparison using OpenCV subprocess handler.
|
||||
|
||||
Returns:
|
||||
tuple: (changed_detected, update_obj, screenshot_bytes)
|
||||
"""
|
||||
now = time.time()
|
||||
# Get the current screenshot
|
||||
if not self.fetcher.screenshot:
|
||||
raise ProcessorException(
|
||||
message="No screenshot available. Ensure the watch is configured to use a real browser.",
|
||||
url=watch.get('url')
|
||||
)
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Quick MD5 check - skip expensive comparison if images are identical
|
||||
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
|
||||
current_md5 = hashlib.md5(self.screenshot).hexdigest()
|
||||
previous_md5 = watch.get('previous_md5')
|
||||
if previous_md5 and current_md5 == previous_md5:
|
||||
logger.debug(f"UUID: {watch.get('uuid')} - Screenshot MD5 unchanged ({current_md5}), skipping comparison")
|
||||
raise checksumFromPreviousCheckWasTheSame()
|
||||
else:
|
||||
logger.debug(f"UUID: {watch.get('uuid')} - Screenshot MD5 changed")
|
||||
|
||||
|
||||
|
||||
# Check if bounding box is set (for drawn area mode)
|
||||
# Read from processor-specific config JSON file (named after processor)
|
||||
crop_region = None
|
||||
|
||||
processor_config = self.get_extra_watch_config(PROCESSOR_CONFIG_NAME)
|
||||
bounding_box = processor_config.get('bounding_box') if processor_config else None
|
||||
|
||||
|
||||
# Get pixel difference threshold sensitivity (per-watch > global)
|
||||
# This controls how different a pixel must be (0-255 scale) to count as "changed"
|
||||
pixel_difference_threshold_sensitivity = processor_config.get('pixel_difference_threshold_sensitivity')
|
||||
if not pixel_difference_threshold_sensitivity:
|
||||
pixel_difference_threshold_sensitivity = self.datastore.data['settings']['application'].get('pixel_difference_threshold_sensitivity', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT)
|
||||
try:
|
||||
pixel_difference_threshold_sensitivity = int(pixel_difference_threshold_sensitivity)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default")
|
||||
pixel_difference_threshold_sensitivity = SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
|
||||
|
||||
|
||||
# Get minimum change percentage (per-watch > global > env var default)
|
||||
# This controls what percentage of pixels must change to trigger a detection
|
||||
min_change_percentage = processor_config.get('min_change_percentage')
|
||||
if not min_change_percentage:
|
||||
min_change_percentage = self.datastore.data['settings']['application'].get('min_change_percentage', 1)
|
||||
try:
|
||||
min_change_percentage = int(min_change_percentage)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Invalid min_change_percentage value '{min_change_percentage}', using default 0.1")
|
||||
min_change_percentage = 1
|
||||
|
||||
# Template matching for tracking content movement
|
||||
template_matching_enabled = processor_config.get('auto_track_region', False) #@@todo disabled for now
|
||||
|
||||
if bounding_box:
|
||||
try:
|
||||
# Parse bounding box: "x,y,width,height"
|
||||
parts = [int(p.strip()) for p in bounding_box.split(',')]
|
||||
if len(parts) == 4:
|
||||
x, y, width, height = parts
|
||||
# Crop uses (left, top, right, bottom)
|
||||
crop_region = (max(0, x), max(0, y), x + width, y + height)
|
||||
logger.info(f"UUID: {watch.get('uuid')} - Bounding box enabled: cropping to region {crop_region} (x={x}, y={y}, w={width}, h={height})")
|
||||
else:
|
||||
logger.warning(f"UUID: {watch.get('uuid')} - Invalid bounding box format: {bounding_box} (expected 4 values)")
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {watch.get('uuid')} - Failed to parse bounding box '{bounding_box}': {e}")
|
||||
|
||||
# If no bounding box, check if visual selector (include_filters) is set for region-based comparison
|
||||
if not crop_region:
|
||||
include_filters = watch.get('include_filters', [])
|
||||
|
||||
if include_filters and len(include_filters) > 0:
|
||||
# Get the first filter to use for cropping
|
||||
first_filter = include_filters[0].strip()
|
||||
|
||||
if first_filter and self.xpath_data:
|
||||
try:
|
||||
import json
|
||||
# xpath_data is JSON string from browser
|
||||
xpath_data_obj = json.loads(self.xpath_data) if isinstance(self.xpath_data, str) else self.xpath_data
|
||||
|
||||
# Find the bounding box for the first filter
|
||||
for element in xpath_data_obj.get('size_pos', []):
|
||||
# Match the filter with the element's xpath
|
||||
if element.get('xpath') == first_filter and element.get('highlight_as_custom_filter'):
|
||||
# Found the element - extract crop coordinates
|
||||
left = element.get('left', 0)
|
||||
top = element.get('top', 0)
|
||||
width = element.get('width', 0)
|
||||
height = element.get('height', 0)
|
||||
|
||||
# Crop uses (left, top, right, bottom)
|
||||
crop_region = (max(0, left), max(0, top), left + width, top + height)
|
||||
|
||||
logger.info(f"UUID: {watch.get('uuid')} - Visual selector enabled: cropping to region {crop_region} for filter: {first_filter}")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {watch.get('uuid')} - Failed to parse xpath_data for visual selector: {e}")
|
||||
|
||||
# Store original crop region for template matching
|
||||
original_crop_region = crop_region
|
||||
|
||||
# Check if this is the first check (no previous history)
|
||||
history_keys = list(watch.history.keys())
|
||||
if len(history_keys) == 0:
|
||||
# First check - save baseline, no comparison
|
||||
logger.info(f"UUID: {watch.get('uuid')} - First check for watch {watch.get('uuid')} - saving baseline screenshot")
|
||||
|
||||
# LibVIPS uses automatic reference counting - no explicit cleanup needed
|
||||
update_obj = {
|
||||
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
|
||||
'last_error': False
|
||||
}
|
||||
logger.trace(f"Processed in {time.time() - now:.3f}s")
|
||||
return False, update_obj, self.screenshot
|
||||
|
||||
# Get previous screenshot bytes from history
|
||||
previous_timestamp = history_keys[-1]
|
||||
previous_screenshot_bytes = watch.get_history_snapshot(timestamp=previous_timestamp)
|
||||
|
||||
# Screenshots are stored as PNG, so this should be bytes
|
||||
if isinstance(previous_screenshot_bytes, str):
|
||||
# If it's a string (shouldn't be for screenshots, but handle it)
|
||||
previous_screenshot_bytes = previous_screenshot_bytes.encode('utf-8')
|
||||
|
||||
# Template matching is temporarily disabled pending OpenCV implementation
|
||||
# crop_region calculated above will be used as-is
|
||||
|
||||
# Perform comparison in isolated subprocess to prevent memory leaks
|
||||
try:
|
||||
from .image_handler import isolated_opencv as process_screenshot_handler
|
||||
|
||||
# stuff in watch doesnt need to be there
|
||||
logger.debug(f"UUID: {watch.get('uuid')} - Starting isolated subprocess comparison (crop_region={crop_region})")
|
||||
|
||||
# Compare using isolated subprocess with OpenCV (async-safe to avoid blocking event loop)
|
||||
# Pass raw bytes and crop region - subprocess handles all image operations
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
# Async-safe wrapper: runs coroutine in new thread with its own event loop
|
||||
# This prevents blocking the async update worker's event loop
|
||||
def run_async_in_thread():
|
||||
return asyncio.run(
|
||||
process_screenshot_handler.compare_images_isolated(
|
||||
img_bytes_from=previous_screenshot_bytes,
|
||||
img_bytes_to=self.screenshot,
|
||||
pixel_difference_threshold=pixel_difference_threshold_sensitivity,
|
||||
blur_sigma=OPENCV_BLUR_SIGMA,
|
||||
crop_region=crop_region # Pass crop region for isolated cropping
|
||||
)
|
||||
)
|
||||
|
||||
# Run in thread to avoid blocking event loop when called from async update worker
|
||||
result_container = [None]
|
||||
exception_container = [None]
|
||||
|
||||
def thread_target():
|
||||
try:
|
||||
result_container[0] = run_async_in_thread()
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-Processor")
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
if exception_container[0]:
|
||||
raise exception_container[0]
|
||||
|
||||
# Subprocess returns only the change score - we decide if it's a "change"
|
||||
change_score = result_container[0]
|
||||
if change_score is None:
|
||||
raise RuntimeError("Image comparison subprocess returned no result")
|
||||
|
||||
changed_detected = change_score > min_change_percentage
|
||||
logger.info(f"UUID: {watch.get('uuid')} - {process_screenshot_handler.IMPLEMENTATION_NAME}: {change_score:.2f}% pixels changed, pixel_diff_threshold_sensitivity: {pixel_difference_threshold_sensitivity:.0f} score={change_score:.2f}%, min_change_threshold={min_change_percentage}%")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"UUID: {watch.get('uuid')} - Failed to compare screenshots: {e}")
|
||||
logger.trace(f"UUID: {watch.get('uuid')} - Processed in {time.time() - now:.3f}s")
|
||||
|
||||
raise ProcessorException(
|
||||
message=f"UUID: {watch.get('uuid')} - Screenshot comparison failed: {e}",
|
||||
url=watch.get('url')
|
||||
)
|
||||
|
||||
# Return results
|
||||
update_obj = {
|
||||
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
|
||||
'last_error': False
|
||||
}
|
||||
|
||||
if changed_detected:
|
||||
logger.info(f"UUID: {watch.get('uuid')} - Change detected using OpenCV! Score: {change_score:.2f}")
|
||||
else:
|
||||
logger.debug(f"UUID: {watch.get('uuid')} - No significant change using OpenCV. Score: {change_score:.2f}")
|
||||
logger.trace(f"UUID: {watch.get('uuid')} - Processed in {time.time() - now:.3f}s")
|
||||
|
||||
return changed_detected, update_obj, self.screenshot
|
||||
|
||||
@@ -0,0 +1,235 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
|
||||
{% block content %}
|
||||
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='diff-image.css')}}?v={{ get_css_version() }}">
|
||||
<script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
|
||||
|
||||
<div id="settings">
|
||||
<form class="pure-form " action="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid) }}" method="GET" id="diff-form">
|
||||
<fieldset class="diff-fieldset">
|
||||
{% if versions|length >= 1 %}
|
||||
<span style="white-space: nowrap;">
|
||||
<label id="change-from" for="diff-from-version" class="from-to-label">From</label>
|
||||
<select id="diff-from-version" name="from_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== from_version %} selected="" {% endif %}>
|
||||
{{ version }}
|
||||
</option>
|
||||
{%- endfor -%}
|
||||
</select>
|
||||
</span>
|
||||
<span style="white-space: nowrap;">
|
||||
<label id="change-to" for="diff-to-version" class="from-to-label">To</label>
|
||||
<select id="diff-to-version" name="to_version" class="needs-localtime">
|
||||
{%- for version in versions|reverse -%}
|
||||
<option value="{{ version }}" {% if version== to_version %} selected="" {% endif %}>
|
||||
{{ version }}
|
||||
</option>
|
||||
{%- endfor -%}
|
||||
</select>
|
||||
</span>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
<fieldset id="diff-style">
|
||||
<span>
|
||||
<strong>Change Detection:</strong> {{ "%.2f"|format(change_percentage) }}% of pixels changed
|
||||
{% if change_percentage > 0.1 %}
|
||||
<span class="change-detected">⚠ Change Detected</span>
|
||||
{% else %}
|
||||
<span class="no-change">✓ No Significant Change</span>
|
||||
{% endif %}
|
||||
</span>
|
||||
</fieldset>
|
||||
{%- if versions|length >= 2 -%}
|
||||
<div id="keyboard-nav">
|
||||
<strong>Keyboard: </strong>
|
||||
<a href="" class="pure-button pure-button-primary" id="btn-previous"> ← Previous</a>
|
||||
<a class="pure-button pure-button-primary" id="btn-next" href=""> → Next</a>
|
||||
</div>
|
||||
{%- endif -%}
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div id="screenshot-comparison">
|
||||
<!-- Two-panel layout: Interactive slider + Static diff -->
|
||||
<div class="comparison-grid">
|
||||
<!-- Panel 1: Interactive Comparison Slider (Previous ↔ Current) -->
|
||||
<div class="screenshot-panel">
|
||||
<h3>Interactive Comparison</h3>
|
||||
<div class="comparison-description">
|
||||
Drag slider to compare Previous ({{ from_version|format_timestamp_timeago }})
|
||||
vs Current ({{ to_version|format_timestamp_timeago }})
|
||||
</div>
|
||||
<div style="text-align: center; margin-bottom: 0.5em; display: flex; justify-content: center; gap: 1em;">
|
||||
<a href="#" onclick="downloadImage('img-before', '{{ from_version }}'); return false;" class="download-link" title="Download previous snapshot">
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
|
||||
<path d="M8 12L3 7h3V1h4v6h3z"/>
|
||||
<path d="M1 14h14v2H1z"/>
|
||||
</svg>
|
||||
Previous
|
||||
</a>
|
||||
<a href="#" onclick="downloadImage('img-after', '{{ to_version }}'); return false;" class="download-link" title="Download current snapshot">
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
|
||||
<path d="M8 12L3 7h3V1h4v6h3z"/>
|
||||
<path d="M1 14h14v2H1z"/>
|
||||
</svg>
|
||||
Current
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div class="image-comparison" id="comparison-container">
|
||||
<!-- Before image wrapper (Previous snapshot) -->
|
||||
<div class="comparison-image-wrapper">
|
||||
<img id="img-before" src="{{ url_for('ui.ui_diff.processor_asset', uuid=uuid, asset_name='before', from_version=from_version, to_version=to_version) }}" alt="Previous screenshot">
|
||||
</div>
|
||||
|
||||
<!-- After image wrapper (Current snapshot) -->
|
||||
<div class="comparison-image-wrapper comparison-after">
|
||||
<img id="img-after" src="{{ url_for('ui.ui_diff.processor_asset', uuid=uuid, asset_name='after', from_version=from_version, to_version=to_version) }}" alt="Current screenshot">
|
||||
</div>
|
||||
|
||||
<!-- Labels -->
|
||||
<div class="comparison-labels">
|
||||
<span class="comparison-label">Previous</span>
|
||||
<span class="comparison-label">Current</span>
|
||||
</div>
|
||||
|
||||
<!-- Draggable slider -->
|
||||
<div class="comparison-slider" id="comparison-slider">
|
||||
<div class="comparison-handle"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Panel 2: Difference Visualization (Static) -->
|
||||
<div class="screenshot-panel diff">
|
||||
<h3>Difference Visualization</h3>
|
||||
<div class="diff-section-header">
|
||||
<span>Red = Changed Pixels</span>
|
||||
</div>
|
||||
<div style="text-align: center; margin-bottom: 0.5em;">
|
||||
<a href="#" onclick="downloadImage('diff-image', '{{ to_version }}_diff'); return false;" class="download-link" title="Download difference image">
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
|
||||
<path d="M8 12L3 7h3V1h4v6h3z"/>
|
||||
<path d="M1 14h14v2H1z"/>
|
||||
</svg>
|
||||
Download
|
||||
</a>
|
||||
</div>
|
||||
<img id="diff-image" src="{{ url_for('ui.ui_diff.processor_asset', uuid=uuid, asset_name='rendered_diff', from_version=from_version, to_version=to_version) }}" alt="Difference visualization with red highlights">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if comparison_data and comparison_data.get('history') and comparison_data.history|length > 1 %}
|
||||
<div class="comparison-history-section">
|
||||
<h3>Comparison History</h3>
|
||||
<p>Recent comparison results (last {{ comparison_data.history|length }} checks)</p>
|
||||
<div style="overflow-x: auto;">
|
||||
<table class="pure-table pure-table-striped" style="width: 100%;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Timestamp</th>
|
||||
<th>Change %</th>
|
||||
<th>Method</th>
|
||||
<th>Changed?</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for entry in comparison_data.history|reverse %}
|
||||
<tr>
|
||||
<td>{{ entry.timestamp|format_timestamp_timeago }}</td>
|
||||
<td>{{ "%.2f"|format(entry.change_percentage) }}%</td>
|
||||
<td>{{ entry.method }}</td>
|
||||
<td>
|
||||
{% if entry.changed %}
|
||||
<span class="history-changed-yes">Yes</span>
|
||||
{% else %}
|
||||
<span class="history-changed-no">No</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function downloadImage(imageId, filename) {
|
||||
// Get the image element
|
||||
const img = document.getElementById(imageId);
|
||||
const base64Data = img.src;
|
||||
|
||||
// Convert base64 to blob
|
||||
const byteString = atob(base64Data.split(',')[1]);
|
||||
const mimeString = base64Data.split(',')[0].split(':')[1].split(';')[0];
|
||||
|
||||
const ab = new ArrayBuffer(byteString.length);
|
||||
const ia = new Uint8Array(ab);
|
||||
for (let i = 0; i < byteString.length; i++) {
|
||||
ia[i] = byteString.charCodeAt(i);
|
||||
}
|
||||
|
||||
const blob = new Blob([ab], { type: mimeString });
|
||||
|
||||
// Determine file extension from MIME type
|
||||
const extension = mimeString.includes('jpeg') ? '.jpeg' : '.png';
|
||||
|
||||
// Create download link
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = filename + extension;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
|
||||
// Cleanup
|
||||
setTimeout(() => {
|
||||
document.body.removeChild(a);
|
||||
URL.revokeObjectURL(url);
|
||||
}, 100);
|
||||
}
|
||||
|
||||
/**
|
||||
* Synchronize comparison slider width with diff image width
|
||||
* This ensures both panels display images at the same max-width
|
||||
*/
|
||||
function syncComparisonWidth() {
|
||||
const diffImage = document.getElementById('diff-image');
|
||||
const comparisonContainer = document.getElementById('comparison-container');
|
||||
|
||||
if (!diffImage || !comparisonContainer) return;
|
||||
|
||||
// Wait for diff image to load to get its actual rendered width
|
||||
if (diffImage.complete) {
|
||||
applyWidth();
|
||||
} else {
|
||||
diffImage.addEventListener('load', applyWidth);
|
||||
}
|
||||
|
||||
function applyWidth() {
|
||||
const diffImageWidth = diffImage.offsetWidth;
|
||||
if (diffImageWidth > 0) {
|
||||
comparisonContainer.style.maxWidth = diffImageWidth + 'px';
|
||||
comparisonContainer.style.margin = '0 auto';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run on page load
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', syncComparisonWidth);
|
||||
} else {
|
||||
syncComparisonWidth();
|
||||
}
|
||||
|
||||
// Re-sync on window resize
|
||||
window.addEventListener('resize', syncComparisonWidth);
|
||||
</script>
|
||||
|
||||
<script src="{{ url_for('static_content', group='js', filename='comparison-slider.js') }}" defer></script>
|
||||
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,35 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block content %}
|
||||
<script src="{{ url_for('static_content', group='js', filename='preview.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="" method="GET">
|
||||
<fieldset>
|
||||
<label for="preview-version">Select timestamp</label> <select id="preview-version"
|
||||
name="version"
|
||||
class="needs-localtime">
|
||||
{% for version in versions|reverse %}
|
||||
<option value="{{ version }}" {% if version == timestamp %} selected="" {% endif %}>
|
||||
{{ version }}
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<button type="submit" class="pure-button pure-button-primary">Go</button>
|
||||
|
||||
</fieldset>
|
||||
</form>
|
||||
<br>
|
||||
<strong>Keyboard: </strong><a href="" class="pure-button pure-button-primary" id="btn-previous">
|
||||
← Previous</a> <a class="pure-button pure-button-primary" id="btn-next" href="">
|
||||
→ Next</a>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div id="screenshot-container" style="text-align: center; border: 1px solid #ddd; padding: 2em; background: #fafafa; border-radius: 4px;">
|
||||
<h3 style="margin-top: 0;">Screenshot from {{ timestamp|format_timestamp_timeago }}</h3>
|
||||
<img src="{{ url_for('ui.ui_preview.processor_asset', uuid=uuid, asset_name='screenshot', version=timestamp) }}"
|
||||
alt="Screenshot preview"
|
||||
style="max-width: 100%; height: auto; border: 1px solid #ccc; box-shadow: 0 2px 8px rgba(0,0,0,0.1); border-radius: 2px;">
|
||||
</div>
|
||||
{% endblock %}
|
||||
22
changedetectionio/processors/image_ssim_diff/util.py
Normal file
22
changedetectionio/processors/image_ssim_diff/util.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
DEPRECATED: All multiprocessing functions have been removed.
|
||||
|
||||
The image_ssim_diff processor now uses LibVIPS via ImageDiffHandler abstraction,
|
||||
which provides superior performance and memory efficiency through streaming
|
||||
architecture and automatic threading.
|
||||
|
||||
All image operations are now handled by:
|
||||
- imagehandler.py: Abstract base class defining the interface
|
||||
- libvips_handler.py: LibVIPS implementation with streaming and threading
|
||||
|
||||
Historical note: This file previously contained multiprocessing workers for:
|
||||
- Template matching (find_region_with_template_matching_isolated)
|
||||
- Template regeneration (regenerate_template_isolated)
|
||||
- Image cropping (crop_image_isolated, crop_pil_image_isolated)
|
||||
|
||||
These have been replaced by handler methods which are:
|
||||
- Faster (no subprocess overhead)
|
||||
- More memory efficient (LibVIPS streaming)
|
||||
- Cleaner (no multiprocessing deadlocks)
|
||||
- Better tested (no logger/forking issues)
|
||||
"""
|
||||
@@ -103,15 +103,15 @@ class guess_stream_type():
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
elif has_html_patterns or http_content_header == 'text/html':
|
||||
self.is_html = True
|
||||
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
# magic will call a rss document 'xml'
|
||||
# Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
|
||||
# This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
|
||||
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
|
||||
self.is_rss = True
|
||||
elif has_html_patterns or http_content_header == 'text/html':
|
||||
self.is_html = True
|
||||
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
|
||||
# Only mark as generic XML if not already detected as RSS
|
||||
if not self.is_rss:
|
||||
|
||||
@@ -6,34 +6,35 @@ from wtforms import (
|
||||
from wtforms.fields.choices import RadioField
|
||||
from wtforms.fields.form import FormField
|
||||
from wtforms.form import Form
|
||||
from flask_babel import lazy_gettext as _l
|
||||
|
||||
from changedetectionio.forms import processor_text_json_diff_form
|
||||
|
||||
|
||||
class RestockSettingsForm(Form):
|
||||
in_stock_processing = RadioField(label='Re-stock detection', choices=[
|
||||
('in_stock_only', "In Stock only (Out Of Stock -> In Stock only)"),
|
||||
('all_changes', "Any availability changes"),
|
||||
('off', "Off, don't follow availability/restock"),
|
||||
in_stock_processing = RadioField(label=_l('Re-stock detection'), choices=[
|
||||
('in_stock_only', _l("In Stock only (Out Of Stock -> In Stock only)")),
|
||||
('all_changes', _l("Any availability changes")),
|
||||
('off', _l("Off, don't follow availability/restock")),
|
||||
], default="in_stock_only")
|
||||
|
||||
price_change_min = FloatField('Below price to trigger notification', [validators.Optional()],
|
||||
render_kw={"placeholder": "No limit", "size": "10"})
|
||||
price_change_max = FloatField('Above price to trigger notification', [validators.Optional()],
|
||||
render_kw={"placeholder": "No limit", "size": "10"})
|
||||
price_change_threshold_percent = FloatField('Threshold in % for price changes since the original price', validators=[
|
||||
price_change_min = FloatField(_l('Below price to trigger notification'), [validators.Optional()],
|
||||
render_kw={"placeholder": _l("No limit"), "size": "10"})
|
||||
price_change_max = FloatField(_l('Above price to trigger notification'), [validators.Optional()],
|
||||
render_kw={"placeholder": _l("No limit"), "size": "10"})
|
||||
price_change_threshold_percent = FloatField(_l('Threshold in %% for price changes since the original price'), validators=[
|
||||
|
||||
validators.Optional(),
|
||||
validators.NumberRange(min=0, max=100, message="Should be between 0 and 100"),
|
||||
validators.NumberRange(min=0, max=100, message=_l("Should be between 0 and 100")),
|
||||
], render_kw={"placeholder": "0%", "size": "5"})
|
||||
|
||||
follow_price_changes = BooleanField('Follow price changes', default=True)
|
||||
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
restock_settings = FormField(RestockSettingsForm)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return 'Restock & Price Detection'
|
||||
return _l('Restock & Price Detection')
|
||||
|
||||
def extra_form_content(self):
|
||||
output = ""
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .. import difference_detection_processor
|
||||
from ..base import difference_detection_processor
|
||||
from ..exceptions import ProcessorException
|
||||
from . import Restock
|
||||
from loguru import logger
|
||||
@@ -7,8 +7,14 @@ import urllib3
|
||||
import time
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
name = 'Re-stock & Price detection for pages with a SINGLE product'
|
||||
description = 'Detects if the product goes back to in-stock'
|
||||
# Translatable strings - extracted by pybabel, translated at runtime in __init__.py
|
||||
# Use a marker function so pybabel can extract these strings
|
||||
def _(x): return x # Translation marker for extraction only
|
||||
name = _('Re-stock & Price detection for pages with a SINGLE product')
|
||||
description = _('Detects if the product goes back to in-stock')
|
||||
del _ # Remove marker function
|
||||
processor_weight = 1
|
||||
list_badge_text = "Restock" # _()
|
||||
|
||||
class UnableToExtractRestockData(Exception):
|
||||
def __init__(self, status_code):
|
||||
@@ -187,6 +193,8 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
|
||||
itemprop_availability = {}
|
||||
|
||||
# Try built-in extraction first, this will scan metadata in the HTML
|
||||
try:
|
||||
itemprop_availability = get_itemprop_availability(self.fetcher.content)
|
||||
except MoreThanOnePriceFound as e:
|
||||
@@ -198,6 +206,33 @@ class perform_site_check(difference_detection_processor):
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# If built-in extraction didn't get both price AND availability, try plugin override
|
||||
# Only check plugin if this watch is using a fetcher that might provide better data
|
||||
has_price = itemprop_availability.get('price') is not None
|
||||
has_availability = itemprop_availability.get('availability') is not None
|
||||
|
||||
# @TODO !!! some setting like "Use as fallback" or "always use", "t
|
||||
if not (has_price and has_availability) or True:
|
||||
from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
|
||||
fetcher_name = watch.get('fetch_backend', 'html_requests')
|
||||
|
||||
# Only try plugin override if not using system default (which might be anything)
|
||||
if fetcher_name and fetcher_name != 'system':
|
||||
logger.debug("Calling extra plugins for getting item price/availability")
|
||||
plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)
|
||||
|
||||
if plugin_availability:
|
||||
# Plugin provided better data, use it
|
||||
plugin_has_price = plugin_availability.get('price') is not None
|
||||
plugin_has_availability = plugin_availability.get('availability') is not None
|
||||
|
||||
# Only use plugin data if it's actually better than what we have
|
||||
if plugin_has_price or plugin_has_availability:
|
||||
itemprop_availability = plugin_availability
|
||||
logger.info(f"Using plugin-provided availability data for fetcher '{fetcher_name}' (built-in had price={has_price}, availability={has_availability}; plugin has price={plugin_has_price}, availability={plugin_has_availability})")
|
||||
if not plugin_availability:
|
||||
logger.debug("No item price/availability from plugins")
|
||||
|
||||
# Something valid in get_itemprop_availability() by scraping metadata ?
|
||||
if itemprop_availability.get('price') or itemprop_availability.get('availability'):
|
||||
# Store for other usage
|
||||
|
||||
47
changedetectionio/processors/templates/extract.html
Normal file
47
changedetectionio/processors/templates/extract.html
Normal file
@@ -0,0 +1,47 @@
|
||||
{% extends 'base.html' %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
{% block content %}
|
||||
<div class="tabs">
|
||||
<ul>
|
||||
{% if last_error_text %}<li class="tab" id="error-text-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-text">Error Text</a></li> {% endif %}
|
||||
{% if last_error_screenshot %}<li class="tab" id="error-screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#error-screenshot">Error Screenshot</a></li> {% endif %}
|
||||
<li class="tab" id=""><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#text">Text</a></li>
|
||||
<li class="tab" id="screenshot-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid)}}#screenshot">Screenshot</a></li>
|
||||
<li class="tab active" id="extract-tab"><a href="{{ url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)}}">Extract Data</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div id="diff-ui">
|
||||
|
||||
<div class="xxxxtab-pane-inner" id="extract">
|
||||
<form id="extract-data-form" class="pure-form pure-form-stacked edit-form" action="{{ url_for('ui.ui_diff.diff_history_page_extract_POST', uuid=uuid) }}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
|
||||
<p>This tool will extract text data from all of the watch history.</p>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(extract_form.extract_regex) }}
|
||||
<span class="pure-form-message-inline">
|
||||
A <strong>RegEx</strong> is a pattern that identifies exactly which part inside of the text that you want to extract.<br>
|
||||
|
||||
<p>
|
||||
For example, to extract only the numbers from text ‐<br>
|
||||
<strong>Raw text</strong>: <code>Temperature <span style="color: red">5.5</span>°C in Sydney</code><br>
|
||||
<strong>RegEx to extract:</strong> <code>Temperature <span style="color: red">([0-9\.]+)</span></code><br>
|
||||
</p>
|
||||
<p>
|
||||
<a href="https://RegExr.com/">Be sure to test your RegEx here.</a>
|
||||
</p>
|
||||
<p>
|
||||
Each RegEx group bracket <code>()</code> will be in its own column, the first column value is always the date.
|
||||
</p>
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(extract_form.extract_submit_button) }}
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endblock %}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user