mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-10-30 06:07:50 +00:00
Compare commits
302 Commits
lev-test
...
bf070e617f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf070e617f | ||
|
|
1628586553 | ||
|
|
0d0368846a | ||
|
|
08169c23f3 | ||
|
|
a8192f608f | ||
|
|
a23c07ba94 | ||
|
|
431fd168a1 | ||
|
|
7dbd0b75b2 | ||
|
|
ae532c82e8 | ||
|
|
ab0b85d088 | ||
|
|
66aec365c2 | ||
|
|
e09cea60ef | ||
|
|
f304ae19db | ||
|
|
2116b2cb93 | ||
|
|
21bf3827e7 | ||
|
|
ea6623115a | ||
|
|
6f7d3b689d | ||
|
|
8f580ac96b | ||
|
|
a8cadc3d16 | ||
|
|
c9290d73e0 | ||
|
|
2db5e906e9 | ||
|
|
0751bd371a | ||
|
|
95380cbd20 | ||
|
|
c227c4308c | ||
|
|
ea778450b2 | ||
|
|
8be6b91990 | ||
|
|
3ffa0805e9 | ||
|
|
3335270692 | ||
|
|
a7573b10ec | ||
|
|
df945ad743 | ||
|
|
4536e95205 | ||
|
|
a8d06e9d69 | ||
|
|
6c166ba2c4 | ||
|
|
a3b3497f7c | ||
|
|
339106c5a9 | ||
|
|
63dfb395ef | ||
|
|
c4072269dd | ||
|
|
2ad7d4633c | ||
|
|
bce3b00728 | ||
|
|
5b5449e034 | ||
|
|
a3a93d2081 | ||
|
|
650b1799c8 | ||
|
|
ea7f2b1752 | ||
|
|
892ea6b198 | ||
|
|
10e3db50f6 | ||
|
|
e66229d26b | ||
|
|
1479d7bd46 | ||
|
|
9ba2094f75 | ||
|
|
8aa012ba8e | ||
|
|
8bc6b10db1 | ||
|
|
76d799c95b | ||
|
|
7c8bdfcc9f | ||
|
|
01a938d7ce | ||
|
|
e44853c439 | ||
|
|
3830bec891 | ||
|
|
88ab663330 | ||
|
|
68335b95c3 | ||
|
|
7bbfa0ef32 | ||
|
|
e233d52931 | ||
|
|
181d32e82a | ||
|
|
a51614f83d | ||
|
|
07f98d6bd3 | ||
|
|
f71550da4d | ||
|
|
060fdcf3f5 | ||
|
|
8c3d0d7e31 | ||
|
|
46658a85d6 | ||
|
|
d699652955 | ||
|
|
9e88db5d9b | ||
|
|
5d9c102aff | ||
|
|
cb1c36d97d | ||
|
|
cc29ba5ea9 | ||
|
|
6f371b1bc6 | ||
|
|
f750fa1765 | ||
|
|
785dabd071 | ||
|
|
09914d54a0 | ||
|
|
58b5586674 | ||
|
|
cb02ccc8b4 | ||
|
|
ec692ed727 | ||
|
|
2fb2ea573e | ||
|
|
ada2dc6112 | ||
|
|
ad9024a4f0 | ||
|
|
047c10e23c | ||
|
|
4f83164544 | ||
|
|
6a28a6a42f | ||
|
|
6f926ed595 | ||
|
|
249dc55212 | ||
|
|
6aba43419e | ||
|
|
cb31e6eac6 | ||
|
|
a172d00b9e | ||
|
|
97b0e12fd3 | ||
|
|
a389084407 | ||
|
|
961994abcf | ||
|
|
2709ba6772 | ||
|
|
46252bc6f3 | ||
|
|
64350a2e78 | ||
|
|
2902c63a3b | ||
|
|
55b8588f1f | ||
|
|
02ecc4ae9a | ||
|
|
3ee50b7832 | ||
|
|
66ddd87ee4 | ||
|
|
233189e4f7 | ||
|
|
b237fd7201 | ||
|
|
3c81efe2f4 | ||
|
|
0fcfb94690 | ||
|
|
bb6d4c2756 | ||
|
|
b59ce190ac | ||
|
|
80be1a30f2 | ||
|
|
93b4f79006 | ||
|
|
3009e46617 | ||
|
|
8f040a1a84 | ||
|
|
4dbab8d77a | ||
|
|
cde42c8a49 | ||
|
|
3b9d19df43 | ||
|
|
82b2bf5cb0 | ||
|
|
6ad4acc9fc | ||
|
|
3e59521f48 | ||
|
|
0970c087c8 | ||
|
|
676c550e6e | ||
|
|
78fa47f6f8 | ||
|
|
5bbc33fd36 | ||
|
|
4aa5bb6da3 | ||
|
|
f7dfc9bbb8 | ||
|
|
584b6e378d | ||
|
|
ab1b8e90cd | ||
|
|
0f6f2a9b9c | ||
|
|
ea45c706be | ||
|
|
1d3cadc773 | ||
|
|
bd3e2dc9c9 | ||
|
|
2e1e301915 | ||
|
|
824a1ceb96 | ||
|
|
d7aac2f86c | ||
|
|
8a254edcf3 | ||
|
|
ddeb90752a | ||
|
|
10ff8516e2 | ||
|
|
0fbd9b22bc | ||
|
|
ef437e1af4 | ||
|
|
754febfd33 | ||
|
|
0c9c475f32 | ||
|
|
76951efa1b | ||
|
|
40418b29fb | ||
|
|
e4baca1127 | ||
|
|
12e5f369aa | ||
|
|
bb61a35a54 | ||
|
|
be1b9ed4db | ||
|
|
c55b8f2e36 | ||
|
|
4c764bdfed | ||
|
|
50958ee1f1 | ||
|
|
a57d046b0c | ||
|
|
98745bbe00 | ||
|
|
363e8225a0 | ||
|
|
ea5ae13e83 | ||
|
|
2598eb7e3e | ||
|
|
4b9ae5a97c | ||
|
|
2a69365337 | ||
|
|
ff9f09ba80 | ||
|
|
c8caa0662d | ||
|
|
5cfe758cce | ||
|
|
25cb637533 | ||
|
|
f4e8d1963f | ||
|
|
7c8bbe6ece | ||
|
|
6b031502a3 | ||
|
|
35c22c5cc7 | ||
|
|
d87e17023a | ||
|
|
f36a9799c1 | ||
|
|
45d5e961dc | ||
|
|
9eb4af12b5 | ||
|
|
45f2863966 | ||
|
|
12a1c200a3 | ||
|
|
a5faab6a5c | ||
|
|
7ca3373d1f | ||
|
|
01c1ac4c0c | ||
|
|
b2f9aec383 | ||
|
|
a95aa67aef | ||
|
|
cbeefeccbb | ||
|
|
2b72d38235 | ||
|
|
8fe7aec3c6 | ||
|
|
6e1f5a8503 | ||
|
|
b74b76c9f9 | ||
|
|
a27265450c | ||
|
|
cc5455c3dc | ||
|
|
9db7fb83eb | ||
|
|
f0061110c9 | ||
|
|
a13fedc0d6 | ||
|
|
7576bec66a | ||
|
|
7672190923 | ||
|
|
0ade4307b0 | ||
|
|
8c03b65dc6 | ||
|
|
8a07459e43 | ||
|
|
cd8e115118 | ||
|
|
4ff7b20fcf | ||
|
|
8120f00148 | ||
|
|
127abf49f1 | ||
|
|
db81c3c5e2 | ||
|
|
9952af7a52 | ||
|
|
790577c1b6 | ||
|
|
bab362fb7d | ||
|
|
a177d02406 | ||
|
|
8b8f280565 | ||
|
|
e752875504 | ||
|
|
0a4562fc09 | ||
|
|
c84ac2eab1 | ||
|
|
3ae07ac633 | ||
|
|
8379fdb1f8 | ||
|
|
3f77e075b9 | ||
|
|
685bd01156 | ||
|
|
20bcca578a | ||
|
|
f05f143b46 | ||
|
|
d7f00679a0 | ||
|
|
b7da6f0ca7 | ||
|
|
e4a81ebe08 | ||
|
|
a4edc46af0 | ||
|
|
767db3b79b | ||
|
|
4f6e9dcc56 | ||
|
|
aa4e182549 | ||
|
|
fe1f7c30e1 | ||
|
|
e5ed1ae349 | ||
|
|
d1b1dd70f4 | ||
|
|
93b14c9fc8 | ||
|
|
c9c5de20d8 | ||
|
|
011fa3540e | ||
|
|
c3c3671f8b | ||
|
|
5980bd9bcd | ||
|
|
438871429c | ||
|
|
173ce5bfa2 | ||
|
|
106b1f85fa | ||
|
|
a5c7f343d0 | ||
|
|
401886bcda | ||
|
|
c66fca9de9 | ||
|
|
daee4c5c17 | ||
|
|
af5d0b6963 | ||
|
|
f92dd81c8f | ||
|
|
55cdcfe3ea | ||
|
|
2f7520a6c5 | ||
|
|
4fdc5d7da2 | ||
|
|
308f30b2e8 | ||
|
|
4fa2042d12 | ||
|
|
2a4e1bad4e | ||
|
|
8a317eead5 | ||
|
|
b58094877f | ||
|
|
afe252126c | ||
|
|
342e6119f1 | ||
|
|
e4ff87e970 | ||
|
|
e45a544f15 | ||
|
|
9a5abaa17a | ||
|
|
b8ecfff861 | ||
|
|
58e2a41c95 | ||
|
|
a7214db9c3 | ||
|
|
b9da4af64f | ||
|
|
b77105be7b | ||
|
|
3d5a544ea6 | ||
|
|
4f362385e1 | ||
|
|
a01d6169d2 | ||
|
|
9beda3911d | ||
|
|
5ed596bfa9 | ||
|
|
99ca8787ab | ||
|
|
8f1a6feb90 | ||
|
|
c0e229201b | ||
|
|
66bc7fbc04 | ||
|
|
530bd40ca5 | ||
|
|
36004cf74b | ||
|
|
c7374245e1 | ||
|
|
59df59e9cd | ||
|
|
c0c2898b91 | ||
|
|
abac660bac | ||
|
|
26de64d873 | ||
|
|
79d9a8ca28 | ||
|
|
5c391fbcad | ||
|
|
d7e24f64a5 | ||
|
|
d6427d823f | ||
|
|
47eb874f47 | ||
|
|
37019355fd | ||
|
|
a8e7f8236e | ||
|
|
2414b61fcb | ||
|
|
a63ffa89b1 | ||
|
|
59e93c29d0 | ||
|
|
d7173bb96e | ||
|
|
d544e11a20 | ||
|
|
7f0c19c61c | ||
|
|
30e84f1030 | ||
|
|
d5af91d8f7 | ||
|
|
4b18c633ba | ||
|
|
08728d7d03 | ||
|
|
73f3beda00 | ||
|
|
7b8d335c43 | ||
|
|
ba0b6071e6 | ||
|
|
a6603d5ad6 | ||
|
|
26833781a7 | ||
|
|
f3ed9bdbb5 | ||
|
|
0f65178190 | ||
|
|
a58fc82575 | ||
|
|
2575c03ae0 | ||
|
|
9b7372fff0 | ||
|
|
fcd6ebe0ee | ||
|
|
c162ec9d52 | ||
|
|
bb7f7f473b | ||
|
|
a9ca511004 | ||
|
|
8df61f5eaa | ||
|
|
162f573967 | ||
|
|
eada0ef08d | ||
|
|
f57bc10973 | ||
|
|
d2e8f822d6 | ||
|
|
5fd8200fd9 |
@@ -29,3 +29,34 @@ venv/
|
||||
|
||||
# Visual Studio
|
||||
.vscode/
|
||||
|
||||
# Test and development files
|
||||
test-datastore/
|
||||
tests/
|
||||
*.md
|
||||
!README.md
|
||||
|
||||
# Temporary and log files
|
||||
*.log
|
||||
*.tmp
|
||||
tmp/
|
||||
temp/
|
||||
|
||||
# Training data and large files
|
||||
train-data/
|
||||
works-data/
|
||||
|
||||
# Container files
|
||||
Dockerfile*
|
||||
docker-compose*.yml
|
||||
.dockerignore
|
||||
|
||||
# Development certificates and keys
|
||||
*.pem
|
||||
*.key
|
||||
*.crt
|
||||
profile_output.prof
|
||||
|
||||
# Large binary files that shouldn't be in container
|
||||
*.pdf
|
||||
chrome.json
|
||||
51
.github/actions/extract-memory-report/action.yml
vendored
Normal file
51
.github/actions/extract-memory-report/action.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: 'Extract Memory Test Report'
|
||||
description: 'Extracts and displays memory test report from a container'
|
||||
inputs:
|
||||
container-name:
|
||||
description: 'Name of the container to extract logs from'
|
||||
required: true
|
||||
python-version:
|
||||
description: 'Python version for artifact naming'
|
||||
required: true
|
||||
output-dir:
|
||||
description: 'Directory to store output logs'
|
||||
required: false
|
||||
default: 'output-logs'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Create output directory
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p ${{ inputs.output-dir }}
|
||||
|
||||
- name: Dump container log
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Disabled for now"
|
||||
# return
|
||||
# docker logs ${{ inputs.container-name }} > ${{ inputs.output-dir }}/${{ inputs.container-name }}-stdout-${{ inputs.python-version }}.txt 2>&1 || echo "Could not get stdout"
|
||||
# docker logs ${{ inputs.container-name }} 2> ${{ inputs.output-dir }}/${{ inputs.container-name }}-stderr-${{ inputs.python-version }}.txt || echo "Could not get stderr"
|
||||
|
||||
- name: Extract and display memory test report
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Disabled for now"
|
||||
# echo "Extracting test-memory.log from container..."
|
||||
# docker cp ${{ inputs.container-name }}:/app/changedetectionio/test-memory.log ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log || echo "test-memory.log not found in container"
|
||||
#
|
||||
# echo "=== Top 10 Highest Peak Memory Tests ==="
|
||||
# if [ -f ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log ]; then
|
||||
# grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | \
|
||||
# sed 's/.*Peak memory: //' | \
|
||||
# paste -d'|' - <(grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log) | \
|
||||
# sort -t'|' -k1 -nr | \
|
||||
# cut -d'|' -f2 | \
|
||||
# head -10
|
||||
# echo ""
|
||||
# echo "=== Full Memory Test Report ==="
|
||||
# cat ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log
|
||||
# else
|
||||
# echo "No memory log available"
|
||||
# fi
|
||||
8
.github/dependabot.yml
vendored
8
.github/dependabot.yml
vendored
@@ -4,11 +4,11 @@ updates:
|
||||
directory: /
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
"caronc/apprise":
|
||||
versioning-strategy: "increase"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
groups:
|
||||
all:
|
||||
patterns:
|
||||
- "*"
|
||||
- package-ecosystem: pip
|
||||
directory: /
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
|
||||
6
.github/test/Dockerfile-alpine
vendored
6
.github/test/Dockerfile-alpine
vendored
@@ -2,7 +2,7 @@
|
||||
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
||||
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
||||
|
||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.21
|
||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.22
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt /requirements.txt
|
||||
@@ -18,17 +18,19 @@ RUN \
|
||||
libxslt-dev \
|
||||
openssl-dev \
|
||||
python3-dev \
|
||||
file \
|
||||
zip \
|
||||
zlib-dev && \
|
||||
apk add --update --no-cache \
|
||||
libjpeg \
|
||||
libxslt \
|
||||
file \
|
||||
nodejs \
|
||||
poppler-utils \
|
||||
python3 && \
|
||||
echo "**** pip3 install test of changedetection.io ****" && \
|
||||
python3 -m venv /lsiopy && \
|
||||
pip install -U pip wheel setuptools && \
|
||||
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \
|
||||
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \
|
||||
apk del --purge \
|
||||
build-dependencies
|
||||
|
||||
8
.github/workflows/codeql-analysis.yml
vendored
8
.github/workflows/codeql-analysis.yml
vendored
@@ -30,11 +30,11 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v5
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
uses: github/codeql-action/init@v4
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
uses: github/codeql-action/autobuild@v4
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
@@ -59,4 +59,4 @@ jobs:
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
uses: github/codeql-action/analyze@v4
|
||||
|
||||
30
.github/workflows/containers.yml
vendored
30
.github/workflows/containers.yml
vendored
@@ -39,12 +39,20 @@ jobs:
|
||||
# Or if we are in a tagged release scenario.
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
@@ -95,7 +103,7 @@ jobs:
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
@@ -103,6 +111,13 @@ jobs:
|
||||
# provenance: false
|
||||
|
||||
# A new tagged release is required, which builds :tag and :latest
|
||||
- name: Debug release info
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
run: |
|
||||
echo "Release tag: ${{ github.event.release.tag_name }}"
|
||||
echo "Github ref: ${{ github.ref }}"
|
||||
echo "Github ref name: ${{ github.ref_name }}"
|
||||
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v5
|
||||
@@ -112,9 +127,10 @@ jobs:
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
||||
ghcr.io/dgtlmoon/changedetection.io
|
||||
tags: |
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
type=semver,pattern={{version}},value=${{ github.event.release.tag_name }}
|
||||
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
||||
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
||||
type=raw,value=latest
|
||||
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
@@ -125,7 +141,7 @@ jobs:
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
# Looks like this was disabled
|
||||
|
||||
39
.github/workflows/pypi-release.yml
vendored
39
.github/workflows/pypi-release.yml
vendored
@@ -7,9 +7,9 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install pypa/build
|
||||
@@ -21,39 +21,60 @@ jobs:
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
|
||||
test-pypi-package:
|
||||
name: Test the built 📦 package works basically.
|
||||
name: Test the built package works basically.
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Test that the basic pip built package runs without error
|
||||
run: |
|
||||
set -ex
|
||||
ls -alR
|
||||
|
||||
# Find and install the first .whl file
|
||||
find dist -type f -name "*.whl" -exec pip3 install {} \; -quit
|
||||
# Install the first wheel found in dist/
|
||||
WHEEL=$(find dist -type f -name "*.whl" -print -quit)
|
||||
echo Installing $WHEEL
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install "$WHEEL"
|
||||
changedetection.io -d /tmp -p 10000 &
|
||||
|
||||
sleep 3
|
||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
|
||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null
|
||||
|
||||
# --- API test ---
|
||||
# This also means that the docs/api-spec.yml was shipped and could be read
|
||||
test -f /tmp/url-watches.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json)
|
||||
echo Test API KEY is $API_KEY
|
||||
curl -X POST "http://127.0.0.1:10000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--show-error --fail \
|
||||
--retry 6 --retry-delay 1 --retry-connrefused \
|
||||
-d '{
|
||||
"url": "https://example.com",
|
||||
"title": "Example Site Monitor",
|
||||
"time_between_check": { "hours": 1 }
|
||||
}'
|
||||
|
||||
killall changedetection.io
|
||||
|
||||
|
||||
@@ -72,7 +93,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
52
.github/workflows/test-container-build.yml
vendored
52
.github/workflows/test-container-build.yml
vendored
@@ -23,15 +23,41 @@ on:
|
||||
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
|
||||
# @todo: some kind of path filter for requirements.txt and Dockerfile
|
||||
jobs:
|
||||
test-container-build:
|
||||
builder:
|
||||
name: Build ${{ matrix.platform }} (${{ matrix.dockerfile == './Dockerfile' && 'main' || 'alpine' }})
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
# Main Dockerfile platforms
|
||||
- platform: linux/amd64
|
||||
dockerfile: ./Dockerfile
|
||||
- platform: linux/arm64
|
||||
dockerfile: ./Dockerfile
|
||||
- platform: linux/arm/v7
|
||||
dockerfile: ./Dockerfile
|
||||
- platform: linux/arm/v8
|
||||
dockerfile: ./Dockerfile
|
||||
# Alpine Dockerfile platforms (musl via alpine check)
|
||||
- platform: linux/amd64
|
||||
dockerfile: ./.github/test/Dockerfile-alpine
|
||||
- platform: linux/arm64
|
||||
dockerfile: ./.github/test/Dockerfile-alpine
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
@@ -47,24 +73,14 @@ jobs:
|
||||
version: latest
|
||||
driver-opts: image=moby/buildkit:master
|
||||
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067
|
||||
# Check we can still build under alpine/musl
|
||||
- name: Test that the docker containers can build (musl via alpine check)
|
||||
id: docker_build_musl
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./.github/test/Dockerfile-alpine
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Test that the docker containers can build
|
||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v6
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
file: ${{ matrix.dockerfile }}
|
||||
platforms: ${{ matrix.platform }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=min
|
||||
|
||||
|
||||
13
.github/workflows/test-only.yml
vendored
13
.github/workflows/test-only.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
lint-code:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
- name: Lint with Ruff
|
||||
run: |
|
||||
pip install ruff
|
||||
@@ -15,8 +15,14 @@ jobs:
|
||||
ruff check . --select E9,F63,F7,F82
|
||||
# Complete check with errors treated as warnings
|
||||
ruff check . --exit-zero
|
||||
- name: Validate OpenAPI spec
|
||||
run: |
|
||||
pip install openapi-spec-validator
|
||||
python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))"
|
||||
|
||||
test-application-3-10:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
@@ -24,12 +30,15 @@ jobs:
|
||||
|
||||
|
||||
test-application-3-11:
|
||||
# Always run
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
test-application-3-12:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
@@ -37,6 +46,8 @@ jobs:
|
||||
skip-pypuppeteer: true
|
||||
|
||||
test-application-3-13:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
|
||||
457
.github/workflows/test-stack-reusable-workflow.yml
vendored
457
.github/workflows/test-stack-reusable-workflow.yml
vendored
@@ -15,138 +15,294 @@ on:
|
||||
default: false
|
||||
|
||||
jobs:
|
||||
test-application:
|
||||
# Build the Docker image once and share it with all test jobs
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
# Mainly just for link/flake8
|
||||
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
run: |
|
||||
echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
# Build a changedetection.io container and start testing inside
|
||||
docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
|
||||
# Debug info
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
|
||||
- name: We should be Python ${{ env.PYTHON_VERSION }} ...
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'python3 --version'
|
||||
|
||||
- name: Spin up ancillary testable services
|
||||
run: |
|
||||
|
||||
docker network create changedet-network
|
||||
|
||||
# Selenium
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
|
||||
# SocketPuppetBrowser + Extra for custom browser test
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run test-changedetectionio bash -c 'python3 --version'
|
||||
|
||||
- name: Spin up ancillary SMTP+Echo message test server
|
||||
- name: Save Docker image
|
||||
run: |
|
||||
# Debug SMTP server/echo message back server
|
||||
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
|
||||
docker ps
|
||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Show docker container state and other debug info
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/test-changedetectionio.tar
|
||||
retention-days: 1
|
||||
|
||||
# Unit tests (lightweight, no ancillary services needed)
|
||||
unit-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
set -x
|
||||
echo "Running processes in docker..."
|
||||
docker ps
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Run Unit Tests
|
||||
run: |
|
||||
# Unit tests
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
|
||||
- name: Test built container with Pytest (generally as requests/plaintext fetching)
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 25
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
# All tests
|
||||
echo "run test with pytest"
|
||||
# The default pytest logger_level is TRACE
|
||||
# To change logger_level for pytest(test/conftest.py),
|
||||
# append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG'
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
# PLAYWRIGHT/NODE-> CDP
|
||||
- name: Playwright and SocketPuppetBrowser - Specific tests in built container
|
||||
- name: Test built container with Pytest
|
||||
run: |
|
||||
# Playwright via Sockpuppetbrowser fetch
|
||||
# tests/visualselector/test_fetch_data.py will do browser steps
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
|
||||
- name: Extract memory report and logs
|
||||
if: always()
|
||||
uses: ./.github/actions/extract-memory-report
|
||||
with:
|
||||
container-name: test-cdio-basic-tests
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Playwright and SocketPuppetBrowser - Headers and requests
|
||||
run: |
|
||||
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
|
||||
- name: Store test artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Playwright and SocketPuppetBrowser - Restock detection
|
||||
run: |
|
||||
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
# Playwright tests
|
||||
playwright-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
# STRAIGHT TO CDP
|
||||
- name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
# Playwright via Sockpuppetbrowser fetch
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Pyppeteer and SocketPuppetBrowser - Restock detection
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
run: |
|
||||
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
- name: Playwright - Specific tests in built container
|
||||
run: |
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
|
||||
- name: Playwright - Headers and requests
|
||||
run: |
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
|
||||
|
||||
- name: Playwright - Restock detection
|
||||
run: |
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
# Pyppeteer tests
|
||||
pyppeteer-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Pyppeteer - Specific tests in built container
|
||||
run: |
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
|
||||
- name: Pyppeteer - Headers and requests checks
|
||||
run: |
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
|
||||
- name: Pyppeteer - Restock detection
|
||||
run: |
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
# Selenium tests
|
||||
selenium-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
sleep 3
|
||||
|
||||
- name: Specific tests for headers and requests checks with Selenium
|
||||
run: |
|
||||
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
|
||||
# SELENIUM
|
||||
- name: Specific tests in built container for Selenium
|
||||
run: |
|
||||
# Selenium fetch
|
||||
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
|
||||
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
|
||||
|
||||
- name: Specific tests in built container for headers and requests checks with Selenium
|
||||
|
||||
# SMTP tests
|
||||
smtp-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up SMTP test server
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
|
||||
|
||||
# OTHER STUFF
|
||||
- name: Test SMTP notification mime types
|
||||
run: |
|
||||
# SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above
|
||||
# "mailserver" hostname defined above
|
||||
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
|
||||
|
||||
# @todo Add a test via playwright/puppeteer
|
||||
# squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py
|
||||
- name: Test proxy squid style interaction
|
||||
# Proxy tests
|
||||
proxy-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Test proxy Squid style interaction
|
||||
run: |
|
||||
cd changedetectionio
|
||||
./run_proxy_tests.sh
|
||||
docker ps
|
||||
cd ..
|
||||
|
||||
- name: Test proxy SOCKS5 style interaction
|
||||
@@ -155,87 +311,132 @@ jobs:
|
||||
./run_socks_proxy_tests.sh
|
||||
cd ..
|
||||
|
||||
# Custom browser URL tests
|
||||
custom-browser-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Test custom browser URL
|
||||
run: |
|
||||
cd changedetectionio
|
||||
./run_custom_browser_url_tests.sh
|
||||
cd ..
|
||||
|
||||
- name: Test changedetection.io container starts+runs basically without error
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Test container starts+runs basically without error
|
||||
run: |
|
||||
docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
|
||||
sleep 3
|
||||
# Should return 0 (no error) when grep finds it
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
|
||||
|
||||
# and IPv6
|
||||
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
||||
|
||||
# Check whether TRACE log is enabled.
|
||||
# Also, check whether TRACE came from STDOUT
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
|
||||
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
||||
docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
|
||||
# Check whether DEBUG is came from STDOUT
|
||||
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
|
||||
|
||||
docker kill test-changedetectionio
|
||||
|
||||
- name: Test changedetection.io SIGTERM and SIGINT signal shutdown
|
||||
- name: Test HTTPS SSL mode
|
||||
run: |
|
||||
openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost"
|
||||
docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio
|
||||
sleep 3
|
||||
curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid
|
||||
docker kill test-changedetectionio-ssl
|
||||
|
||||
- name: Test IPv6 Mode
|
||||
run: |
|
||||
docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio
|
||||
sleep 3
|
||||
curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid
|
||||
docker kill test-changedetectionio-ipv6
|
||||
|
||||
# Signal tests
|
||||
signal-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Test SIGTERM and SIGINT signal shutdown
|
||||
run: |
|
||||
|
||||
echo SIGINT Shutdown request test
|
||||
docker run --name sig-test -d test-changedetectionio
|
||||
sleep 3
|
||||
echo ">>> Sending SIGINT to sig-test container"
|
||||
docker kill --signal=SIGINT sig-test
|
||||
sleep 3
|
||||
# invert the check (it should be not 0/not running)
|
||||
docker ps
|
||||
# check signal catch(STDERR) log. Because of
|
||||
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
|
||||
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1
|
||||
test -z "`docker ps|grep sig-test`"
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Looks like container was running when it shouldnt be"
|
||||
docker ps
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# @todo - scan the container log to see the right "graceful shutdown" text exists
|
||||
docker rm sig-test
|
||||
|
||||
|
||||
echo SIGTERM Shutdown request test
|
||||
docker run --name sig-test -d test-changedetectionio
|
||||
sleep 3
|
||||
echo ">>> Sending SIGTERM to sig-test container"
|
||||
docker kill --signal=SIGTERM sig-test
|
||||
sleep 3
|
||||
# invert the check (it should be not 0/not running)
|
||||
docker ps
|
||||
# check signal catch(STDERR) log. Because of
|
||||
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
|
||||
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1
|
||||
test -z "`docker ps|grep sig-test`"
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Looks like container was running when it shouldnt be"
|
||||
docker ps
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# @todo - scan the container log to see the right "graceful shutdown" text exists
|
||||
docker rm sig-test
|
||||
|
||||
- name: Dump container log
|
||||
if: always()
|
||||
run: |
|
||||
mkdir output-logs
|
||||
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
|
||||
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
|
||||
|
||||
- name: Store everything including test-datastore
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: .
|
||||
|
||||
40
Dockerfile
40
Dockerfile
@@ -5,7 +5,6 @@ ARG PYTHON_VERSION=3.11
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
||||
|
||||
# See `cryptography` pin comment in requirements.txt
|
||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
g++ \
|
||||
@@ -16,6 +15,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
make \
|
||||
patch \
|
||||
pkg-config \
|
||||
zlib1g-dev
|
||||
|
||||
RUN mkdir /install
|
||||
@@ -23,13 +24,34 @@ WORKDIR /install
|
||||
|
||||
COPY requirements.txt /requirements.txt
|
||||
|
||||
# --extra-index-url https://www.piwheels.org/simple is for cryptography module to be prebuilt (or rustc etc needs to be installed)
|
||||
RUN pip install --extra-index-url https://www.piwheels.org/simple --target=/dependencies -r /requirements.txt
|
||||
# Use cache mounts and multiple wheel sources for faster ARM builds
|
||||
ENV PIP_CACHE_DIR=/tmp/pip-cache
|
||||
# Help Rust find OpenSSL for cryptography package compilation on ARM
|
||||
ENV PKG_CONFIG_PATH="/usr/lib/pkgconfig:/usr/lib/arm-linux-gnueabihf/pkgconfig:/usr/lib/aarch64-linux-gnu/pkgconfig"
|
||||
ENV PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1
|
||||
ENV OPENSSL_DIR="/usr"
|
||||
ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
|
||||
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
|
||||
# Additional environment variables for cryptography Rust build
|
||||
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
RUN --mount=type=cache,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--prefer-binary \
|
||||
--extra-index-url https://www.piwheels.org/simple \
|
||||
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
-r /requirements.txt
|
||||
|
||||
# Playwright is an alternative to Selenium
|
||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
|
||||
RUN pip install --target=/dependencies playwright~=1.48.0 \
|
||||
RUN --mount=type=cache,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--prefer-binary \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
playwright~=1.48.0 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
# Final image stage
|
||||
@@ -42,6 +64,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
locales \
|
||||
# For pdftohtml
|
||||
poppler-utils \
|
||||
# favicon type detection and other uses
|
||||
file \
|
||||
zlib1g \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -62,6 +86,11 @@ EXPOSE 5000
|
||||
|
||||
# The actual flask app module
|
||||
COPY changedetectionio /app/changedetectionio
|
||||
|
||||
# Also for OpenAPI validation wrapper - needs the YML
|
||||
RUN [ ! -d "/app/docs" ] && mkdir /app/docs
|
||||
COPY docs/api-spec.yaml /app/docs/api-spec.yaml
|
||||
|
||||
# Starting wrapper
|
||||
COPY changedetection.py /app/changedetection.py
|
||||
|
||||
@@ -70,6 +99,9 @@ COPY changedetection.py /app/changedetection.py
|
||||
ARG LOGGER_LEVEL=''
|
||||
ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
||||
|
||||
# Default
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
WORKDIR /app
|
||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -186,7 +186,7 @@
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
Copyright 2025 Web Technologies s.r.o.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
recursive-include changedetectionio/api *
|
||||
include docs/api-spec.yaml
|
||||
recursive-include changedetectionio/blueprint *
|
||||
recursive-include changedetectionio/content_fetchers *
|
||||
recursive-include changedetectionio/conditions *
|
||||
recursive-include changedetectionio/content_fetchers *
|
||||
recursive-include changedetectionio/jinja2_custom *
|
||||
recursive-include changedetectionio/model *
|
||||
recursive-include changedetectionio/notification *
|
||||
recursive-include changedetectionio/processors *
|
||||
recursive-include changedetectionio/realtime *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
recursive-include changedetectionio/widgets *
|
||||
prune changedetectionio/static/package-lock.json
|
||||
prune changedetectionio/static/styles/node_modules
|
||||
prune changedetectionio/static/styles/package-lock.json
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
## Web Site Change Detection, Monitoring and Notification.
|
||||
# Monitor website changes
|
||||
|
||||
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
||||
Detect WebPage Changes Automatically — Monitor Web Page Changes in Real Time
|
||||
|
||||
Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more.
|
||||
|
||||
Detect web page content changes and get instant alerts.
|
||||
|
||||
|
||||
[Changedetection.io is the best tool to monitor web-pages for changes](https://changedetection.io) Track website content changes and receive notifications via Discord, Email, Slack, Telegram and 90+ more
|
||||
|
||||
Ideal for monitoring price changes, content edits, conditional changes and more.
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes" title="Self-hosted web page change monitoring, list of websites with changes" />](https://changedetection.io)
|
||||
|
||||
|
||||
[**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||
|
||||
|
||||
|
||||
### Target specific parts of the webpage using the Visual Selector tool.
|
||||
|
||||
20
README.md
20
README.md
@@ -1,11 +1,13 @@
|
||||
## Web Site Change Detection, Restock monitoring and notifications.
|
||||
# Detect Website Changes Automatically — Monitor Web Page Changes in Real Time
|
||||
|
||||
**_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._**
|
||||
Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more.
|
||||
|
||||
_Live your data-life pro-actively._
|
||||
**Detect web page content changes and get instant alerts.**
|
||||
|
||||
Ideal for monitoring price changes, content edits, conditional changes and more.
|
||||
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web site page change monitoring" title="Self-hosted web site page change monitoring" />](https://changedetection.io?src=github)
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Web site page change monitoring" title="Web site page change monitoring" />](https://changedetection.io?src=github)
|
||||
|
||||
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
|
||||
|
||||
@@ -13,6 +15,7 @@ _Live your data-life pro-actively._
|
||||
|
||||
[**Get started with website page change monitoring straight away. Don't have time? Try our $8.99/month subscription, use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_
|
||||
|
||||
|
||||
- Chrome browser included.
|
||||
- Nothing to install, access via browser login after signup.
|
||||
- Super fast, no registration needed setup.
|
||||
@@ -99,9 +102,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
||||
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
|
||||
- Send a screenshot with the notification when a change is detected in the web page
|
||||
|
||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
|
||||
|
||||
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.
|
||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $150 using our signup link.
|
||||
|
||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||
|
||||
@@ -279,7 +280,10 @@ Excel import is recommended - that way you can better organise tags/groups of we
|
||||
|
||||
## API Support
|
||||
|
||||
Supports managing the website watch list [via our API](https://changedetection.io/docs/api_v1/index.html)
|
||||
Full REST API for programmatic management of watches, tags, notifications and more.
|
||||
|
||||
- **[Interactive API Documentation](https://changedetection.io/docs/api_v1/index.html)** - Complete API reference with live testing
|
||||
- **[OpenAPI Specification](docs/api-spec.yaml)** - Generate SDKs for any programming language
|
||||
|
||||
## Support us
|
||||
|
||||
|
||||
@@ -2,20 +2,19 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.49.15'
|
||||
__version__ = '0.50.34'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
import os
|
||||
os.environ['EVENTLET_NO_GREENDNS'] = 'yes'
|
||||
import eventlet
|
||||
import eventlet.wsgi
|
||||
import getopt
|
||||
import platform
|
||||
import signal
|
||||
import socket
|
||||
|
||||
import sys
|
||||
|
||||
# Eventlet completely removed - using threading mode for SocketIO
|
||||
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
from loguru import logger
|
||||
@@ -30,13 +29,43 @@ def get_version():
|
||||
# Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
|
||||
def sigshutdown_handler(_signo, _stack_frame):
|
||||
name = signal.Signals(_signo).name
|
||||
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown')
|
||||
datastore.sync_to_json()
|
||||
logger.success('Sync JSON to disk complete.')
|
||||
# This will throw a SystemExit exception, because eventlet.wsgi.server doesn't know how to deal with it.
|
||||
# Solution: move to gevent or other server in the future (#2014)
|
||||
datastore.stop_thread = True
|
||||
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
|
||||
|
||||
# Set exit flag immediately to stop all loops
|
||||
app.config.exit.set()
|
||||
datastore.stop_thread = True
|
||||
|
||||
# Shutdown workers and queues immediately
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down workers: {str(e)}")
|
||||
|
||||
# Close janus queues properly
|
||||
try:
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
update_q.close()
|
||||
notification_q.close()
|
||||
logger.debug("Janus queues closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
|
||||
|
||||
# Shutdown socketio server fast
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
if socketio_server and hasattr(socketio_server, 'shutdown'):
|
||||
try:
|
||||
socketio_server.shutdown()
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down Socket.IO server: {str(e)}")
|
||||
|
||||
# Save data quickly
|
||||
try:
|
||||
datastore.sync_to_json()
|
||||
logger.success('Fast sync to disk complete.')
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing to disk: {str(e)}")
|
||||
|
||||
sys.exit()
|
||||
|
||||
def main():
|
||||
@@ -45,9 +74,8 @@ def main():
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
host = ''
|
||||
ipv6_enabled = False
|
||||
port = os.environ.get('PORT') or 5000
|
||||
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
|
||||
port = int(os.environ.get('PORT', 5000))
|
||||
ssl_mode = False
|
||||
|
||||
# On Windows, create and use a default path.
|
||||
@@ -88,10 +116,6 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
if opt == '-6':
|
||||
logger.success("Enabling IPv6 listen support")
|
||||
ipv6_enabled = True
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
@@ -103,6 +127,20 @@ def main():
|
||||
if opt == '-l':
|
||||
logger_level = int(arg) if arg.isdigit() else arg.upper()
|
||||
|
||||
|
||||
logger.success(f"changedetection.io version {get_version()} starting.")
|
||||
# Launch using SocketIO run method for proper integration (if enabled)
|
||||
ssl_cert_file = os.getenv("SSL_CERT_FILE", 'cert.pem')
|
||||
ssl_privkey_file = os.getenv("SSL_PRIVKEY_FILE", 'privkey.pem')
|
||||
if os.getenv("SSL_CERT_FILE") and os.getenv("SSL_PRIVKEY_FILE"):
|
||||
ssl_mode = True
|
||||
|
||||
# SSL mode could have been set by -s too, therefor fallback to default values
|
||||
if ssl_mode:
|
||||
if not os.path.isfile(ssl_cert_file) or not os.path.isfile(ssl_privkey_file):
|
||||
logger.critical(f"Cannot start SSL/HTTPS mode, Please be sure that {ssl_cert_file}' and '{ssl_privkey_file}' exist in in {os.getcwd()}")
|
||||
os._exit(2)
|
||||
|
||||
# Without this, a logger will be duplicated
|
||||
logger.remove()
|
||||
try:
|
||||
@@ -143,6 +181,11 @@ def main():
|
||||
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Get the SocketIO instance from the Flask app (created in flask_app.py)
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
global socketio
|
||||
socketio = socketio_server
|
||||
|
||||
signal.signal(signal.SIGTERM, sigshutdown_handler)
|
||||
signal.signal(signal.SIGINT, sigshutdown_handler)
|
||||
|
||||
@@ -167,10 +210,11 @@ def main():
|
||||
|
||||
|
||||
@app.context_processor
|
||||
def inject_version():
|
||||
def inject_template_globals():
|
||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False
|
||||
has_password=datastore.data['settings']['application']['password'] != False,
|
||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
)
|
||||
|
||||
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
|
||||
@@ -194,15 +238,21 @@ def main():
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
||||
|
||||
s_type = socket.AF_INET6 if ipv6_enabled else socket.AF_INET
|
||||
|
||||
if ssl_mode:
|
||||
# @todo finalise SSL config, but this should get you in the right direction if you need it.
|
||||
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), s_type),
|
||||
certfile='cert.pem',
|
||||
keyfile='privkey.pem',
|
||||
server_side=True), app)
|
||||
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
eventlet.wsgi.server(eventlet.listen((host, int(port)), s_type), app)
|
||||
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
|
||||
@@ -1,9 +1,22 @@
|
||||
import os
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request
|
||||
import validators
|
||||
from . import auth
|
||||
from functools import wraps
|
||||
from . import auth, validate_openapi_request
|
||||
from ..validate_url import is_safe_valid_url
|
||||
|
||||
|
||||
def default_content_type(content_type='text/plain'):
|
||||
"""Decorator to set a default Content-Type header if none is provided."""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
if not request.content_type:
|
||||
# Set default content type in the request environment
|
||||
request.environ['CONTENT_TYPE'] = content_type
|
||||
return f(*args, **kwargs)
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
class Import(Resource):
|
||||
@@ -12,17 +25,10 @@ class Import(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@default_content_type('text/plain') #3547 #3542
|
||||
@validate_openapi_request('importWatches')
|
||||
def post(self):
|
||||
"""
|
||||
@api {post} /api/v1/import Import a list of watched URLs
|
||||
@apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line.
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a"
|
||||
@apiName Import
|
||||
@apiGroup Watch
|
||||
@apiSuccess (200) {List} OK List of watch UUIDs added
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
"""Import a list of watched URLs."""
|
||||
|
||||
extras = {}
|
||||
|
||||
@@ -43,14 +49,13 @@ class Import(Resource):
|
||||
|
||||
urls = request.get_data().decode('utf8').splitlines()
|
||||
added = []
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if not len(url):
|
||||
continue
|
||||
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
if not validators.url(url, simple_host=allow_simplehost):
|
||||
if not is_safe_valid_url(url):
|
||||
return f"Invalid or unsupported URL - {url}", 400
|
||||
|
||||
if dedupe and self.datastore.url_exists(url):
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import Resource
|
||||
from . import auth
|
||||
from flask_restful import abort, Resource
|
||||
from flask_restful import Resource, abort
|
||||
from flask import request
|
||||
from . import auth
|
||||
from . import auth, validate_openapi_request
|
||||
from . import schema_create_notification_urls, schema_delete_notification_urls
|
||||
|
||||
class Notifications(Resource):
|
||||
@@ -12,19 +10,9 @@ class Notifications(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getNotifications')
|
||||
def get(self):
|
||||
"""
|
||||
@api {get} /api/v1/notifications Return Notification URL List
|
||||
@apiDescription Return the Notification URL List from the configuration
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
HTTP/1.0 200
|
||||
{
|
||||
'notification_urls': ["notification-urls-list"]
|
||||
}
|
||||
@apiName Get
|
||||
@apiGroup Notifications
|
||||
"""
|
||||
"""Return Notification URL List."""
|
||||
|
||||
notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', [])
|
||||
|
||||
@@ -33,18 +21,10 @@ class Notifications(Resource):
|
||||
}, 200
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('addNotifications')
|
||||
@expects_json(schema_create_notification_urls)
|
||||
def post(self):
|
||||
"""
|
||||
@api {post} /api/v1/notifications Create Notification URLs
|
||||
@apiDescription Add one or more notification URLs from the configuration
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/notifications/batch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
||||
@apiName CreateBatch
|
||||
@apiGroup Notifications
|
||||
@apiSuccess (201) {Object[]} notification_urls List of added notification URLs
|
||||
@apiError (400) {String} Invalid input
|
||||
"""
|
||||
"""Create Notification URLs."""
|
||||
|
||||
json_data = request.get_json()
|
||||
notification_urls = json_data.get("notification_urls", [])
|
||||
@@ -69,18 +49,10 @@ class Notifications(Resource):
|
||||
return {'notification_urls': added_urls}, 201
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('replaceNotifications')
|
||||
@expects_json(schema_create_notification_urls)
|
||||
def put(self):
|
||||
"""
|
||||
@api {put} /api/v1/notifications Replace Notification URLs
|
||||
@apiDescription Replace all notification URLs with the provided list (can be empty)
|
||||
@apiExample {curl} Example usage:
|
||||
curl -X PUT http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
||||
@apiName Replace
|
||||
@apiGroup Notifications
|
||||
@apiSuccess (200) {Object[]} notification_urls List of current notification URLs
|
||||
@apiError (400) {String} Invalid input
|
||||
"""
|
||||
"""Replace Notification URLs."""
|
||||
json_data = request.get_json()
|
||||
notification_urls = json_data.get("notification_urls", [])
|
||||
|
||||
@@ -100,19 +72,10 @@ class Notifications(Resource):
|
||||
return {'notification_urls': clean_urls}, 200
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteNotifications')
|
||||
@expects_json(schema_delete_notification_urls)
|
||||
def delete(self):
|
||||
"""
|
||||
@api {delete} /api/v1/notifications Delete Notification URLs
|
||||
@apiDescription Deletes one or more notification URLs from the configuration
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/notifications -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
||||
@apiParam {String[]} notification_urls The notification URLs to delete.
|
||||
@apiName Delete
|
||||
@apiGroup Notifications
|
||||
@apiSuccess (204) {String} OK Deleted
|
||||
@apiError (400) {String} No matching notification URLs found.
|
||||
"""
|
||||
"""Delete Notification URLs."""
|
||||
|
||||
json_data = request.get_json()
|
||||
urls_to_delete = json_data.get("notification_urls", [])
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from flask_restful import Resource, abort
|
||||
from flask import request
|
||||
from . import auth
|
||||
from . import auth, validate_openapi_request
|
||||
|
||||
class Search(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
@@ -8,21 +8,9 @@ class Search(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('searchWatches')
|
||||
def get(self):
|
||||
"""
|
||||
@api {get} /api/v1/search Search for watches
|
||||
@apiDescription Search watches by URL or title text
|
||||
@apiExample {curl} Example usage:
|
||||
curl "http://localhost:5000/api/v1/search?q=https://example.com/page1" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/search?q=https://example.com/page1?tag=Favourites" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/search?q=https://example.com?partial=true" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiName Search
|
||||
@apiGroup Watch Management
|
||||
@apiQuery {String} q Search query to match against watch URLs and titles
|
||||
@apiQuery {String} [tag] Optional name of tag to limit results (name not UUID)
|
||||
@apiQuery {String} [partial] Allow partial matching of URL query
|
||||
@apiSuccess (200) {Object} JSON Object containing matched watches
|
||||
"""
|
||||
"""Search for watches by URL or title text."""
|
||||
query = request.args.get('q', '').strip()
|
||||
tag_limit = request.args.get('tag', '').strip()
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from flask_restful import Resource
|
||||
from . import auth
|
||||
from . import auth, validate_openapi_request
|
||||
|
||||
|
||||
class SystemInfo(Resource):
|
||||
@@ -9,23 +9,9 @@ class SystemInfo(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getSystemInfo')
|
||||
def get(self):
|
||||
"""
|
||||
@api {get} /api/v1/systeminfo Return system info
|
||||
@apiDescription Return some info about the current system state
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
HTTP/1.0 200
|
||||
{
|
||||
'queue_size': 10 ,
|
||||
'overdue_watches': ["watch-uuid-list"],
|
||||
'uptime': 38344.55,
|
||||
'watch_count': 800,
|
||||
'version': "0.40.1"
|
||||
}
|
||||
@apiName Get Info
|
||||
@apiGroup System Information
|
||||
"""
|
||||
"""Return system info."""
|
||||
import time
|
||||
overdue_watches = []
|
||||
|
||||
|
||||
@@ -1,39 +1,46 @@
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
|
||||
from flask import request
|
||||
from . import auth
|
||||
|
||||
# Import schemas from __init__.py
|
||||
from . import schema_tag, schema_create_tag, schema_update_tag
|
||||
from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request
|
||||
|
||||
|
||||
class Tag(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single tag
|
||||
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getTag')
|
||||
def get(self, uuid):
|
||||
"""
|
||||
@api {get} /api/v1/tag/:uuid Single tag - get data or toggle notification muting.
|
||||
@apiDescription Retrieve tag information and set notification_muted status
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=muted" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiName Tag
|
||||
@apiGroup Tag
|
||||
@apiParam {uuid} uuid Tag unique ID.
|
||||
@apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state
|
||||
@apiSuccess (200) {String} OK When muted operation OR full JSON object of the tag
|
||||
@apiSuccess (200) {JSON} TagJSON JSON Full JSON object of the tag
|
||||
"""
|
||||
"""Get data for a single tag/group, toggle notification muting, or recheck all."""
|
||||
from copy import deepcopy
|
||||
tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
|
||||
if not tag:
|
||||
abort(404, message=f'No tag exists with the UUID of {uuid}')
|
||||
|
||||
if request.args.get('recheck'):
|
||||
# Recheck all, including muted
|
||||
# Get most overdue first
|
||||
i=0
|
||||
for k in sorted(self.datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||
watch_uuid = k[0]
|
||||
watch = k[1]
|
||||
if not watch['paused'] and tag['uuid'] not in watch['tags']:
|
||||
continue
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
i+=1
|
||||
|
||||
return f"OK, {i} watches queued", 200
|
||||
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
|
||||
return "OK", 200
|
||||
@@ -44,16 +51,9 @@ class Tag(Resource):
|
||||
return tag
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteTag')
|
||||
def delete(self, uuid):
|
||||
"""
|
||||
@api {delete} /api/v1/tag/:uuid Delete a tag and remove it from all watches
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiParam {uuid} uuid Tag unique ID.
|
||||
@apiName DeleteTag
|
||||
@apiGroup Tag
|
||||
@apiSuccess (200) {String} OK Was deleted
|
||||
"""
|
||||
"""Delete a tag/group and remove it from all watches."""
|
||||
if not self.datastore.data['settings']['application']['tags'].get(uuid):
|
||||
abort(400, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
|
||||
@@ -68,21 +68,10 @@ class Tag(Resource):
|
||||
return 'OK', 204
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('updateTag')
|
||||
@expects_json(schema_update_tag)
|
||||
def put(self, uuid):
|
||||
"""
|
||||
@api {put} /api/v1/tag/:uuid Update tag information
|
||||
@apiExample {curl} Example usage:
|
||||
Update (PUT)
|
||||
curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"title": "New Tag Title"}'
|
||||
|
||||
@apiDescription Updates an existing tag using JSON
|
||||
@apiParam {uuid} uuid Tag unique ID.
|
||||
@apiName UpdateTag
|
||||
@apiGroup Tag
|
||||
@apiSuccess (200) {String} OK Was updated
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
"""Update tag information."""
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if not tag:
|
||||
abort(404, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
@@ -94,17 +83,10 @@ class Tag(Resource):
|
||||
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('createTag')
|
||||
# Only cares for {'title': 'xxxx'}
|
||||
def post(self):
|
||||
"""
|
||||
@api {post} /api/v1/watch Create a single tag
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"name": "Work related"}'
|
||||
@apiName Create
|
||||
@apiGroup Tag
|
||||
@apiSuccess (200) {String} OK Was created
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
"""Create a single tag/group."""
|
||||
|
||||
json_data = request.get_json()
|
||||
title = json_data.get("title",'').strip()
|
||||
@@ -122,28 +104,9 @@ class Tags(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('listTags')
|
||||
def get(self):
|
||||
"""
|
||||
@api {get} /api/v1/tags List tags
|
||||
@apiDescription Return list of available tags
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/tags -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
{
|
||||
"cc0cfffa-f449-477b-83ea-0caafd1dc091": {
|
||||
"title": "Tech News",
|
||||
"notification_muted": false,
|
||||
"date_created": 1677103794
|
||||
},
|
||||
"e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
|
||||
"title": "Shopping",
|
||||
"notification_muted": true,
|
||||
"date_created": 1676662819
|
||||
}
|
||||
}
|
||||
@apiName ListTags
|
||||
@apiGroup Tag Management
|
||||
@apiSuccess (200) {String} OK JSON dict
|
||||
"""
|
||||
"""List tags/groups."""
|
||||
result = {}
|
||||
for uuid, tag in self.datastore.data['settings']['application']['tags'].items():
|
||||
result[uuid] = {
|
||||
|
||||
@@ -1,16 +1,51 @@
|
||||
import os
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
from flask_expects_json import expects_json
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
from changedetectionio import worker_handler
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request, make_response
|
||||
import validators
|
||||
from flask import request, make_response, send_from_directory
|
||||
from . import auth
|
||||
import copy
|
||||
|
||||
# Import schemas from __init__.py
|
||||
from . import schema, schema_create_watch, schema_update_watch
|
||||
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
|
||||
from ..notification_service import CUSTOM_LINEBREAK_PLACEHOLDER
|
||||
|
||||
|
||||
def validate_time_between_check_required(json_data):
|
||||
"""
|
||||
Validate that at least one time interval is specified when not using default settings.
|
||||
Returns None if valid, or error message string if invalid.
|
||||
Defaults to using global settings if time_between_check_use_default is not provided.
|
||||
"""
|
||||
# Default to using global settings if not specified
|
||||
use_default = json_data.get('time_between_check_use_default', True)
|
||||
|
||||
# If using default settings, no validation needed
|
||||
if use_default:
|
||||
return None
|
||||
|
||||
# If not using defaults, check if time_between_check exists and has at least one non-zero value
|
||||
time_check = json_data.get('time_between_check')
|
||||
if not time_check:
|
||||
# No time_between_check provided and not using defaults - this is an error
|
||||
return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
|
||||
|
||||
# time_between_check exists, check if it has at least one non-zero value
|
||||
if any([
|
||||
(time_check.get('weeks') or 0) > 0,
|
||||
(time_check.get('days') or 0) > 0,
|
||||
(time_check.get('hours') or 0) > 0,
|
||||
(time_check.get('minutes') or 0) > 0,
|
||||
(time_check.get('seconds') or 0) > 0
|
||||
]):
|
||||
return None
|
||||
|
||||
# time_between_check exists but all values are 0 or empty - this is an error
|
||||
return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
|
||||
|
||||
|
||||
class Watch(Resource):
|
||||
@@ -24,30 +59,16 @@ class Watch(Resource):
|
||||
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
||||
# ?recheck=true
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatch')
|
||||
def get(self, uuid):
|
||||
"""
|
||||
@api {get} /api/v1/watch/:uuid Single watch - get data, recheck, pause, mute.
|
||||
@apiDescription Retrieve watch information and set muted/paused status
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=unmuted" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?paused=unpaused" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiName Watch
|
||||
@apiGroup Watch
|
||||
@apiParam {uuid} uuid Watch unique ID.
|
||||
@apiQuery {Boolean} [recheck] Recheck this watch `recheck=1`
|
||||
@apiQuery {String} [paused] =`paused` or =`unpaused` , Sets the PAUSED state
|
||||
@apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state
|
||||
@apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch
|
||||
@apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch
|
||||
"""
|
||||
"""Get information about a single watch, recheck, pause, or mute."""
|
||||
from copy import deepcopy
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
if not watch:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
if request.args.get('recheck'):
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return "OK", 200
|
||||
if request.args.get('paused', '') == 'paused':
|
||||
self.datastore.data['watching'].get(uuid).pause()
|
||||
@@ -68,19 +89,14 @@ class Watch(Resource):
|
||||
# attr .last_changed will check for the last written text snapshot on change
|
||||
watch['last_changed'] = watch.last_changed
|
||||
watch['viewed'] = watch.viewed
|
||||
watch['link'] = watch.link,
|
||||
|
||||
return watch
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteWatch')
|
||||
def delete(self, uuid):
|
||||
"""
|
||||
@api {delete} /api/v1/watch/:uuid Delete a watch and related history
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiParam {uuid} uuid Watch unique ID.
|
||||
@apiName Delete
|
||||
@apiGroup Watch
|
||||
@apiSuccess (200) {String} OK Was deleted
|
||||
"""
|
||||
"""Delete a watch and related history."""
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
abort(400, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
@@ -88,21 +104,10 @@ class Watch(Resource):
|
||||
return 'OK', 204
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('updateWatch')
|
||||
@expects_json(schema_update_watch)
|
||||
def put(self, uuid):
|
||||
"""
|
||||
@api {put} /api/v1/watch/:uuid Update watch information
|
||||
@apiExample {curl} Example usage:
|
||||
Update (PUT)
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "new list"}'
|
||||
|
||||
@apiDescription Updates an existing watch using JSON, accepts the same structure as returned in <a href="#api-Watch-Watch">get single watch information</a>
|
||||
@apiParam {uuid} uuid Watch unique ID.
|
||||
@apiName Update a watch
|
||||
@apiGroup Watch
|
||||
@apiSuccess (200) {String} OK Was updated
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
"""Update watch information."""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
@@ -112,6 +117,15 @@ class Watch(Resource):
|
||||
if not request.json.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(request.json)
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# XSS etc protection
|
||||
if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
|
||||
return "Invalid URL", 400
|
||||
|
||||
watch.update(request.json)
|
||||
|
||||
return "OK", 200
|
||||
@@ -125,22 +139,9 @@ class WatchHistory(Resource):
|
||||
# Get a list of available history for a watch by UUID
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchHistory')
|
||||
def get(self, uuid):
|
||||
"""
|
||||
@api {get} /api/v1/watch/<string:uuid>/history Get a list of all historical snapshots available for a watch
|
||||
@apiDescription Requires `uuid`, returns list
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
|
||||
{
|
||||
"1676649279": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/cb7e9be8258368262246910e6a2a4c30.txt",
|
||||
"1677092785": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/e20db368d6fc633e34f559ff67bb4044.txt",
|
||||
"1677103794": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/02efdd37dacdae96554a8cc85dc9c945.txt"
|
||||
}
|
||||
@apiName Get list of available stored snapshots for watch
|
||||
@apiGroup Watch History
|
||||
@apiSuccess (200) {String} OK
|
||||
@apiSuccess (404) {String} ERR Not found
|
||||
"""
|
||||
"""Get a list of all historical snapshots available for a watch."""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
@@ -153,18 +154,9 @@ class WatchSingleHistory(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchSnapshot')
|
||||
def get(self, uuid, timestamp):
|
||||
"""
|
||||
@api {get} /api/v1/watch/<string:uuid>/history/<int:timestamp> Get single snapshot from watch
|
||||
@apiDescription Requires watch `uuid` and `timestamp`. `timestamp` of "`latest`" for latest available snapshot, or <a href="#api-Watch_History-Get_list_of_available_stored_snapshots_for_watch">use the list returned here</a>
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
|
||||
@apiName Get single snapshot content
|
||||
@apiGroup Watch History
|
||||
@apiParam {String} [html] Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp)
|
||||
@apiSuccess (200) {String} OK
|
||||
@apiSuccess (404) {String} ERR Not found
|
||||
"""
|
||||
"""Get single snapshot from watch."""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message=f"No watch exists with the UUID of {uuid}")
|
||||
@@ -190,6 +182,149 @@ class WatchSingleHistory(Resource):
|
||||
|
||||
return response
|
||||
|
||||
class WatchHistoryDiff(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchHistoryDiff')
|
||||
def get(self, uuid, from_timestamp, to_timestamp):
|
||||
"""Generate diff between two historical snapshots."""
|
||||
from changedetectionio import diff
|
||||
from changedetectionio.notification.handler import apply_service_tweaks
|
||||
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message=f"No watch exists with the UUID of {uuid}")
|
||||
|
||||
if not len(watch.history):
|
||||
abort(404, message=f"Watch found but no history exists for the UUID {uuid}")
|
||||
|
||||
history_keys = list(watch.history.keys())
|
||||
|
||||
# Handle 'latest' keyword for to_timestamp
|
||||
if to_timestamp == 'latest':
|
||||
to_timestamp = history_keys[-1]
|
||||
|
||||
# Handle 'previous' keyword for from_timestamp (second-most-recent)
|
||||
if from_timestamp == 'previous':
|
||||
if len(history_keys) < 2:
|
||||
abort(404, message=f"Not enough history entries. Need at least 2 snapshots for 'previous'")
|
||||
from_timestamp = history_keys[-2]
|
||||
|
||||
# Validate timestamps exist
|
||||
if from_timestamp not in watch.history:
|
||||
abort(404, message=f"From timestamp {from_timestamp} not found in watch history")
|
||||
if to_timestamp not in watch.history:
|
||||
abort(404, message=f"To timestamp {to_timestamp} not found in watch history")
|
||||
|
||||
# Get the format parameter (default to 'text')
|
||||
output_format = request.args.get('format', 'text').lower()
|
||||
|
||||
# Validate format
|
||||
valid_formats = ['text', 'html', 'htmlcolor']
|
||||
if output_format not in valid_formats:
|
||||
abort(400, message=f"Invalid format. Must be one of: {', '.join(valid_formats)}")
|
||||
|
||||
# Get the word_diff parameter (default to False - line-level mode)
|
||||
word_diff = strtobool(request.args.get('word_diff', 'false'))
|
||||
|
||||
# Get the no_markup parameter (default to False)
|
||||
no_markup = strtobool(request.args.get('no_markup', 'false'))
|
||||
|
||||
# Retrieve snapshot contents
|
||||
from_version_file_contents = watch.get_history_snapshot(from_timestamp)
|
||||
to_version_file_contents = watch.get_history_snapshot(to_timestamp)
|
||||
|
||||
# Get diff preferences (using defaults similar to the existing code)
|
||||
diff_prefs = {
|
||||
'diff_ignoreWhitespace': False,
|
||||
'diff_changesOnly': True
|
||||
}
|
||||
|
||||
# Generate the diff
|
||||
content = diff.render_diff(
|
||||
previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
ignore_junk=diff_prefs.get('diff_ignoreWhitespace'),
|
||||
include_equal=not diff_prefs.get('diff_changesOnly'),
|
||||
word_diff=word_diff,
|
||||
)
|
||||
|
||||
# Skip formatting if no_markup is set
|
||||
if no_markup:
|
||||
mimetype = "text/plain"
|
||||
else:
|
||||
# Apply formatting based on the requested format
|
||||
if output_format == 'htmlcolor':
|
||||
from changedetectionio.notification.handler import apply_html_color_to_body
|
||||
content = apply_html_color_to_body(n_body=content)
|
||||
mimetype = "text/html"
|
||||
else:
|
||||
# Apply service tweaks for text/html formats
|
||||
# Pass empty URL and title as they're not used for the placeholder replacement we need
|
||||
_, content, _ = apply_service_tweaks(
|
||||
url='',
|
||||
n_body=content,
|
||||
n_title='',
|
||||
requested_output_format=output_format
|
||||
)
|
||||
mimetype = "text/html" if output_format == 'html' else "text/plain"
|
||||
|
||||
import re
|
||||
if 'html' in output_format:
|
||||
content = re.sub(
|
||||
re.escape(CUSTOM_LINEBREAK_PLACEHOLDER) + r'\r?\n?',
|
||||
'<br>\\r\\n',
|
||||
content
|
||||
)
|
||||
else:
|
||||
# texty types
|
||||
content = re.sub(
|
||||
re.escape(CUSTOM_LINEBREAK_PLACEHOLDER) + r'\r?\n?',
|
||||
'\\r\\n',
|
||||
content
|
||||
)
|
||||
|
||||
response = make_response(content, 200)
|
||||
response.mimetype = mimetype
|
||||
return response
|
||||
|
||||
|
||||
class WatchFavicon(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchFavicon')
|
||||
def get(self, uuid):
|
||||
"""Get favicon for a watch."""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message=f"No watch exists with the UUID of {uuid}")
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
|
||||
abort(404, message=f'No Favicon available for {uuid}')
|
||||
|
||||
|
||||
class CreateWatch(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
@@ -198,25 +333,15 @@ class CreateWatch(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('createWatch')
|
||||
@expects_json(schema_create_watch)
|
||||
def post(self):
|
||||
"""
|
||||
@api {post} /api/v1/watch Create a single watch
|
||||
@apiDescription Requires atleast `url` set, can accept the same structure as <a href="#api-Watch-Watch">get single watch information</a> to create.
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "nice list"}'
|
||||
@apiName Create
|
||||
@apiGroup Watch
|
||||
@apiSuccess (200) {String} OK Was created
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
"""Create a single watch."""
|
||||
|
||||
json_data = request.get_json()
|
||||
url = json_data['url'].strip()
|
||||
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
if not validators.url(url, simple_host=allow_simplehost):
|
||||
if not is_safe_valid_url(url):
|
||||
return "Invalid or unsupported URL", 400
|
||||
|
||||
if json_data.get('proxy'):
|
||||
@@ -224,6 +349,11 @@ class CreateWatch(Resource):
|
||||
if not json_data.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(json_data)
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
extras = copy.deepcopy(json_data)
|
||||
|
||||
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
|
||||
@@ -236,41 +366,15 @@ class CreateWatch(Resource):
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||
if new_uuid:
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported URL", 400
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('listWatches')
|
||||
def get(self):
|
||||
"""
|
||||
@api {get} /api/v1/watch List watches
|
||||
@apiDescription Return concise list of available watches and some very basic info
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
{
|
||||
"6a4b7d5c-fee4-4616-9f43-4ac97046b595": {
|
||||
"last_changed": 1677103794,
|
||||
"last_checked": 1677103794,
|
||||
"last_error": false,
|
||||
"title": "",
|
||||
"url": "http://www.quotationspage.com/random.php"
|
||||
},
|
||||
"e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
|
||||
"last_changed": 0,
|
||||
"last_checked": 1676662819,
|
||||
"last_error": false,
|
||||
"title": "QuickLook",
|
||||
"url": "https://github.com/QL-Win/QuickLook/tags"
|
||||
}
|
||||
}
|
||||
|
||||
@apiParam {String} [recheck_all] Optional Set to =1 to force recheck of all watches
|
||||
@apiParam {String} [tag] Optional name of tag to limit results
|
||||
@apiName ListWatches
|
||||
@apiGroup Watch Management
|
||||
@apiSuccess (200) {String} OK JSON dict
|
||||
"""
|
||||
"""List watches."""
|
||||
list = {}
|
||||
|
||||
tag_limit = request.args.get('tag', '').lower()
|
||||
@@ -284,6 +388,8 @@ class CreateWatch(Resource):
|
||||
'last_changed': watch.last_changed,
|
||||
'last_checked': watch['last_checked'],
|
||||
'last_error': watch['last_error'],
|
||||
'link': watch.link,
|
||||
'page_title': watch['page_title'],
|
||||
'title': watch['title'],
|
||||
'url': watch['url'],
|
||||
'viewed': watch.viewed
|
||||
@@ -291,7 +397,7 @@ class CreateWatch(Resource):
|
||||
|
||||
if request.args.get('recheck_all'):
|
||||
for uuid in self.datastore.data['watching'].keys():
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return {'status': "OK"}, 200
|
||||
|
||||
return list, 200
|
||||
@@ -1,4 +1,7 @@
|
||||
import copy
|
||||
import functools
|
||||
from flask import request, abort
|
||||
from loguru import logger
|
||||
from . import api_schema
|
||||
from ..model import watch_base
|
||||
|
||||
@@ -8,6 +11,7 @@ schema = api_schema.build_watch_json_schema(watch_base_config)
|
||||
|
||||
schema_create_watch = copy.deepcopy(schema)
|
||||
schema_create_watch['required'] = ['url']
|
||||
del schema_create_watch['properties']['last_viewed']
|
||||
|
||||
schema_update_watch = copy.deepcopy(schema)
|
||||
schema_update_watch['additionalProperties'] = False
|
||||
@@ -25,9 +29,58 @@ schema_create_notification_urls['required'] = ['notification_urls']
|
||||
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
|
||||
schema_delete_notification_urls['required'] = ['notification_urls']
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
import os
|
||||
import yaml # Lazy import - only loaded when API validation is actually used
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
# Possibly for pip3 packages
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
||||
return _openapi_spec
|
||||
|
||||
def validate_openapi_request(operation_id):
|
||||
"""Decorator to validate incoming requests against OpenAPI spec."""
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
from werkzeug.exceptions import BadRequest
|
||||
try:
|
||||
# Skip OpenAPI validation for GET requests since they don't have request bodies
|
||||
if request.method.upper() != 'GET':
|
||||
# Lazy import - only loaded when actually validating a request
|
||||
from openapi_core.contrib.flask import FlaskOpenAPIRequest
|
||||
|
||||
spec = get_openapi_spec()
|
||||
openapi_request = FlaskOpenAPIRequest(request)
|
||||
result = spec.unmarshal_request(openapi_request)
|
||||
if result.errors:
|
||||
error_details = []
|
||||
for error in result.errors:
|
||||
error_details.append(str(error))
|
||||
raise BadRequest(f"OpenAPI validation failed: {error_details}")
|
||||
except BadRequest:
|
||||
# Re-raise BadRequest exceptions (validation failures)
|
||||
raise
|
||||
except Exception as e:
|
||||
# If OpenAPI spec loading fails, log but don't break existing functionality
|
||||
logger.critical(f"OpenAPI validation warning for {operation_id}: {e}")
|
||||
abort(500)
|
||||
return f(*args, **kwargs)
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
# Import all API resources
|
||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch
|
||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, WatchFavicon
|
||||
from .Tags import Tags, Tag
|
||||
from .Import import Import
|
||||
from .SystemInfo import SystemInfo
|
||||
from .Notifications import Notifications
|
||||
|
||||
|
||||
@@ -78,6 +78,13 @@ def build_watch_json_schema(d):
|
||||
]:
|
||||
schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
|
||||
|
||||
for v in ['last_viewed']:
|
||||
schema['properties'][v] = {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp in seconds of the last time the watch was viewed.",
|
||||
"minimum": 0
|
||||
}
|
||||
|
||||
# None or Boolean
|
||||
schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
|
||||
|
||||
@@ -112,6 +119,12 @@ def build_watch_json_schema(d):
|
||||
|
||||
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
||||
|
||||
schema['properties']['time_between_check_use_default'] = {
|
||||
"type": "boolean",
|
||||
"default": True,
|
||||
"description": "Whether to use global settings for time between checks - defaults to true if not set"
|
||||
}
|
||||
|
||||
schema['properties']['browser_steps'] = {
|
||||
"anyOf": [
|
||||
{
|
||||
|
||||
496
changedetectionio/async_update_worker.py
Normal file
496
changedetectionio/async_update_worker.py
Normal file
@@ -0,0 +1,496 @@
|
||||
from .processors.exceptions import ProcessorException
|
||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.flask_app import watch_check_update
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
import queue
|
||||
import time
|
||||
|
||||
from loguru import logger
|
||||
|
||||
# Async version of update_worker
|
||||
# Processes jobs from AsyncSignalPriorityQueue instead of threaded queue
|
||||
|
||||
async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
"""
|
||||
Async worker function that processes watch check jobs from the queue.
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for this worker
|
||||
q: AsyncSignalPriorityQueue containing jobs to process
|
||||
notification_q: Standard queue for notifications
|
||||
app: Flask application instance
|
||||
datastore: Application datastore
|
||||
"""
|
||||
# Set a descriptive name for this task
|
||||
task = asyncio.current_task()
|
||||
if task:
|
||||
task.set_name(f"async-worker-{worker_id}")
|
||||
|
||||
logger.info(f"Starting async worker {worker_id}")
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
update_handler = None
|
||||
watch = None
|
||||
|
||||
try:
|
||||
# Use native janus async interface - no threads needed!
|
||||
queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# No jobs available, continue loop
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
|
||||
|
||||
# Log queue health for debugging
|
||||
try:
|
||||
queue_size = q.qsize()
|
||||
is_empty = q.empty()
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}")
|
||||
except Exception as health_e:
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}")
|
||||
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
fetch_start_time = round(time.time())
|
||||
|
||||
# Mark this UUID as being processed
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.set_uuid_processing(uuid, processing=True)
|
||||
|
||||
try:
|
||||
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
|
||||
changed_detected = False
|
||||
contents = b''
|
||||
process_changedetection_results = True
|
||||
update_obj = {}
|
||||
|
||||
# Clear last errors
|
||||
datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
|
||||
datastore.data['watching'][uuid]['last_checked'] = fetch_start_time
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
|
||||
logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
|
||||
|
||||
try:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
# Processor is what we are using for detecting the "Change"
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Init a new 'difference_detection_processor'
|
||||
processor_module_name = f"changedetectionio.processors.{processor}.processor"
|
||||
try:
|
||||
processor_module = importlib.import_module(processor_module_name)
|
||||
except ModuleNotFoundError as e:
|
||||
print(f"Processor module '{processor}' not found.")
|
||||
raise e
|
||||
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid)
|
||||
|
||||
# All fetchers are now async, so call directly
|
||||
await update_handler.call_browser()
|
||||
|
||||
# Run change detection (this is synchronous)
|
||||
changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
|
||||
|
||||
except PermissionError as e:
|
||||
logger.critical(f"File permission error updating file, watch: {uuid}")
|
||||
logger.critical(str(e))
|
||||
process_changedetection_results = False
|
||||
|
||||
except ProcessorException as e:
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.ReplyWithContentButNoText as e:
|
||||
extra_help = ""
|
||||
if e.has_filters:
|
||||
has_img = html_tools.include_filters(include_filters='img',
|
||||
html_content=e.html_content)
|
||||
if has_img:
|
||||
extra_help = ", it's possible that the filters you have give an empty result or contain only an image."
|
||||
else:
|
||||
extra_help = ", it's possible that the filters were found, but contained no usable text."
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={
|
||||
'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
|
||||
})
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 403:
|
||||
err_text = "Error - 403 (Access denied) received"
|
||||
elif e.status_code == 404:
|
||||
err_text = "Error - 404 (Page not found) received"
|
||||
elif e.status_code == 407:
|
||||
err_text = "Error - 407 (Proxy authentication required) received, did you need a username and password for the proxy?"
|
||||
elif e.status_code == 500:
|
||||
err_text = "Error - 500 (Internal server error) received from the web site"
|
||||
else:
|
||||
extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
|
||||
err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data, as_error=True)
|
||||
if e.page_text:
|
||||
watch.save_error_text(contents=e.page_text)
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
process_changedetection_results = False
|
||||
|
||||
except FilterNotFoundInResponse as e:
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
|
||||
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
c = watch.get('consecutive_filter_failures', 0)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
||||
logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
|
||||
if c >= threshold:
|
||||
if not watch.get('notification_muted'):
|
||||
logger.debug(f"Sending filter failed notification for {uuid}")
|
||||
await send_filter_failure_notification(uuid, notification_q, datastore)
|
||||
c = 0
|
||||
logger.debug(f"Reset filter failure count back to zero")
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
||||
else:
|
||||
logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
|
||||
# Yes fine, so nothing todo, don't continue to process.
|
||||
process_changedetection_results = False
|
||||
changed_detected = False
|
||||
|
||||
except content_fetchers_exceptions.BrowserConnectError as e:
|
||||
datastore.update_watch(uuid=uuid,
|
||||
update_obj={'last_error': e.msg})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.BrowserFetchTimedOut as e:
|
||||
datastore.update_watch(uuid=uuid,
|
||||
update_obj={'last_error': e.msg})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.BrowserStepsStepException as e:
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
error_step = e.step_n + 1
|
||||
from playwright._impl._errors import TimeoutError, Error
|
||||
|
||||
# Generally enough info for TimeoutError (couldnt locate the element after default seconds)
|
||||
err_text = f"Browser step at position {error_step} could not run, check the watch, add a delay if necessary, view Browser Steps to see screenshot at that step."
|
||||
|
||||
if e.original_e.name == "TimeoutError":
|
||||
# Just the first line is enough, the rest is the stack trace
|
||||
err_text += " Could not find the target."
|
||||
else:
|
||||
# Other Error, more info is good.
|
||||
err_text += " " + str(e.original_e).splitlines()[0]
|
||||
|
||||
logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}")
|
||||
|
||||
datastore.update_watch(uuid=uuid,
|
||||
update_obj={'last_error': err_text,
|
||||
'browser_steps_last_error_step': error_step})
|
||||
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
c = watch.get('consecutive_filter_failures', 0)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
||||
logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
|
||||
if threshold > 0 and c >= threshold:
|
||||
if not watch.get('notification_muted'):
|
||||
await send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n, notification_q=notification_q, datastore=datastore)
|
||||
c = 0
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.EmptyReply as e:
|
||||
# Some kind of custom to-str handler in the exception handler that does this?
|
||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.ScreenshotUnavailable as e:
|
||||
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.JSActionExceptions as e:
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.PageUnloadable as e:
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
if e.message:
|
||||
err_text = "{} - {}".format(err_text, e.message)
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code,
|
||||
'has_ldjson_price_data': None})
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
|
||||
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
process_changedetection_results = False
|
||||
logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Worker {worker_id} exception processing watch UUID: {uuid}")
|
||||
logger.error(str(e))
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
|
||||
process_changedetection_results = False
|
||||
|
||||
else:
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
|
||||
if not watch.get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
|
||||
update_obj['last_error'] = False
|
||||
cleanup_error_artifacts(uuid, datastore)
|
||||
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
if changed_detected or not watch.history_n:
|
||||
if update_handler.screenshot:
|
||||
watch.save_screenshot(screenshot=update_handler.screenshot)
|
||||
|
||||
if update_handler.xpath_data:
|
||||
watch.save_xpath_data(data=update_handler.xpath_data)
|
||||
|
||||
# Ensure unique timestamp for history
|
||||
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
|
||||
logger.warning(f"Timestamp {fetch_start_time} already exists, waiting 1 seconds")
|
||||
fetch_start_time += 1
|
||||
await asyncio.sleep(1)
|
||||
|
||||
watch.save_history_text(contents=contents,
|
||||
timestamp=int(fetch_start_time),
|
||||
snapshot_id=update_obj.get('previous_md5', 'none'))
|
||||
|
||||
empty_pages_are_a_change = datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
|
||||
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
|
||||
|
||||
# Explicitly delete large content variables to free memory IMMEDIATELY after saving
|
||||
# These are no longer needed after being saved to history
|
||||
del contents
|
||||
|
||||
# Send notifications on second+ check
|
||||
if watch.history_n >= 2:
|
||||
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
|
||||
if not watch.get('notification_muted'):
|
||||
await send_content_changed_notification(uuid, notification_q, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"Worker {worker_id} exception in process_changedetection_results")
|
||||
logger.critical(str(e))
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
# Always record attempt count
|
||||
count = watch.get('check_count', 0) + 1
|
||||
|
||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
|
||||
# Record server header
|
||||
try:
|
||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
||||
datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Store favicon if necessary
|
||||
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
|
||||
watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
|
||||
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
|
||||
)
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||
'check_count': count})
|
||||
|
||||
# NOW clear fetcher content - after all processing is complete
|
||||
# This is the last point where we need the fetcher data
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
||||
|
||||
# Also update the watch with error information
|
||||
if datastore and uuid in datastore.data['watching']:
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Worker error: {str(e)}"})
|
||||
|
||||
finally:
|
||||
# Always cleanup - this runs whether there was an exception or not
|
||||
if uuid:
|
||||
try:
|
||||
# Mark UUID as no longer being processed
|
||||
worker_handler.set_uuid_processing(uuid, processing=False)
|
||||
|
||||
# Send completion signal
|
||||
if watch:
|
||||
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
|
||||
watch_check_update.send(watch_uuid=watch['uuid'])
|
||||
|
||||
# Explicitly clean up update_handler and all its references
|
||||
if update_handler:
|
||||
# Clear fetcher content using the proper method
|
||||
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
|
||||
# Clear processor references
|
||||
if hasattr(update_handler, 'content_processor'):
|
||||
update_handler.content_processor = None
|
||||
|
||||
update_handler = None
|
||||
|
||||
# Clear local contents variable if it still exists
|
||||
if 'contents' in locals():
|
||||
del contents
|
||||
|
||||
# Note: We don't set watch = None here because:
|
||||
# 1. watch is just a local reference to datastore.data['watching'][uuid]
|
||||
# 2. Setting it to None doesn't affect the datastore
|
||||
# 3. GC can't collect the object anyway (still referenced by datastore)
|
||||
# 4. It would just cause confusion
|
||||
|
||||
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
|
||||
|
||||
# Brief pause before continuing to avoid tight error loops (only on error)
|
||||
if 'e' in locals():
|
||||
await asyncio.sleep(1.0)
|
||||
else:
|
||||
# Small yield for normal completion
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
# Check if we should exit
|
||||
if app.config.exit.is_set():
|
||||
break
|
||||
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
import sys
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
if not in_pytest:
|
||||
logger.info(f"Worker {worker_id} shutting down")
|
||||
|
||||
|
||||
def cleanup_error_artifacts(uuid, datastore):
|
||||
"""Helper function to clean up error artifacts"""
|
||||
cleanup_files = ["last-error-screenshot.png", "last-error.txt"]
|
||||
for f in cleanup_files:
|
||||
full_path = os.path.join(datastore.datastore_path, uuid, f)
|
||||
if os.path.isfile(full_path):
|
||||
os.unlink(full_path)
|
||||
|
||||
|
||||
|
||||
async def send_content_changed_notification(watch_uuid, notification_q, datastore):
|
||||
"""Helper function to queue notifications using the new notification service"""
|
||||
try:
|
||||
from changedetectionio.notification_service import create_notification_service
|
||||
|
||||
# Create notification service instance
|
||||
notification_service = create_notification_service(datastore, notification_q)
|
||||
|
||||
notification_service.send_content_changed_notification(watch_uuid)
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending notification for {watch_uuid}: {e}")
|
||||
|
||||
|
||||
async def send_filter_failure_notification(watch_uuid, notification_q, datastore):
|
||||
"""Helper function to send filter failure notifications using the new notification service"""
|
||||
try:
|
||||
from changedetectionio.notification_service import create_notification_service
|
||||
|
||||
# Create notification service instance
|
||||
notification_service = create_notification_service(datastore, notification_q)
|
||||
|
||||
notification_service.send_filter_failure_notification(watch_uuid)
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending filter failure notification for {watch_uuid}: {e}")
|
||||
|
||||
|
||||
async def send_step_failure_notification(watch_uuid, step_n, notification_q, datastore):
|
||||
"""Helper function to send step failure notifications using the new notification service"""
|
||||
try:
|
||||
from changedetectionio.notification_service import create_notification_service
|
||||
|
||||
# Create notification service instance
|
||||
notification_service = create_notification_service(datastore, notification_q)
|
||||
|
||||
notification_service.send_step_failure_notification(watch_uuid, step_n)
|
||||
except Exception as e:
|
||||
logger.error(f"Error sending step failure notification for {watch_uuid}: {e}")
|
||||
@@ -25,35 +25,53 @@ io_interface_context = None
|
||||
import json
|
||||
import hashlib
|
||||
from flask import Response
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
def run_async_in_browser_loop(coro):
|
||||
"""Run async coroutine using the existing async worker event loop"""
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
# Use the existing async worker event loop instead of creating a new one
|
||||
if worker_handler.USE_ASYNC_WORKERS and worker_handler.async_loop and not worker_handler.async_loop.is_closed():
|
||||
logger.debug("Browser steps using existing async worker event loop")
|
||||
future = asyncio.run_coroutine_threadsafe(coro, worker_handler.async_loop)
|
||||
return future.result()
|
||||
else:
|
||||
# Fallback: create a new event loop (for sync workers or if async loop not available)
|
||||
logger.debug("Browser steps creating temporary event loop")
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
return loop.run_until_complete(coro)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||
|
||||
def start_browsersteps_session(watch_uuid):
|
||||
from . import nonContext
|
||||
async def start_browsersteps_session(watch_uuid):
|
||||
from . import browser_steps
|
||||
import time
|
||||
global io_interface_context
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
# We keep the playwright session open for many minutes
|
||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||
|
||||
browsersteps_start_session = {'start_time': time.time()}
|
||||
|
||||
# You can only have one of these running
|
||||
# This should be very fine to leave running for the life of the application
|
||||
# @idea - Make it global so the pool of watch fetchers can use it also
|
||||
if not io_interface_context:
|
||||
io_interface_context = nonContext.c_sync_playwright()
|
||||
# Start the Playwright context, which is actually a nodejs sub-process and communicates over STDIN/STDOUT pipes
|
||||
io_interface_context = io_interface_context.start()
|
||||
# Create a new async playwright instance for browser steps
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if not '?' in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
|
||||
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
browsersteps_start_session['browser'] = browser
|
||||
browsersteps_start_session['playwright_context'] = playwright_context
|
||||
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
@@ -75,15 +93,20 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
|
||||
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browsersteps_start_session['browser'],
|
||||
browserstepper = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browser,
|
||||
proxy=proxy,
|
||||
start_url=datastore.data['watching'][watch_uuid].link,
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
)
|
||||
|
||||
# Initialize the async connection
|
||||
await browserstepper.connect(proxy=proxy)
|
||||
|
||||
browsersteps_start_session['browserstepper'] = browserstepper
|
||||
|
||||
# For test
|
||||
#browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||
|
||||
return browsersteps_start_session
|
||||
|
||||
@@ -92,7 +115,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
def browsersteps_start_session():
|
||||
# A new session was requested, return sessionID
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
browsersteps_session_id = str(uuid.uuid4())
|
||||
watch_uuid = request.args.get('uuid')
|
||||
@@ -104,7 +127,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logger.debug("browser_steps.py connecting")
|
||||
|
||||
try:
|
||||
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
|
||||
# Run the async function in the dedicated browser steps event loop
|
||||
browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop(
|
||||
start_browsersteps_session(watch_uuid)
|
||||
)
|
||||
except Exception as e:
|
||||
if 'ECONNREFUSED' in str(e):
|
||||
return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
|
||||
@@ -168,12 +194,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
step_optional_value = request.form.get('optional_value')
|
||||
is_last_step = strtobool(request.form.get('is_last_step'))
|
||||
|
||||
# @todo try.. accept.. nice errors not popups..
|
||||
try:
|
||||
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
|
||||
selector=step_selector,
|
||||
optional_value=step_optional_value)
|
||||
# Run the async call_action method in the dedicated browser steps event loop
|
||||
run_async_in_browser_loop(
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
|
||||
action_name=step_operation,
|
||||
selector=step_selector,
|
||||
optional_value=step_optional_value
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
|
||||
@@ -187,7 +216,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
# Screenshots and other info only needed on requesting a step (POST)
|
||||
try:
|
||||
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||
# Run the async get_current_state method in the dedicated browser steps event loop
|
||||
(screenshot, xpath_data) = run_async_in_browser_loop(
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||
)
|
||||
|
||||
if is_last_step:
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||
@@ -195,13 +228,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
watch.save_screenshot(screenshot=screenshot)
|
||||
watch.save_xpath_data(data=xpath_data)
|
||||
|
||||
except playwright._impl._api_types.Error as e:
|
||||
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
||||
except Exception as e:
|
||||
return make_response("Error fetching screenshot and element data - " + str(e), 401)
|
||||
return make_response(f"Error fetching screenshot and element data - {str(e)}", 401)
|
||||
|
||||
# SEND THIS BACK TO THE BROWSER
|
||||
|
||||
output = {
|
||||
"screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
|
||||
"xpath_data": xpath_data,
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
from random import randint
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
|
||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
|
||||
|
||||
|
||||
@@ -61,26 +59,9 @@ class steppable_browser_interface():
|
||||
|
||||
def __init__(self, start_url):
|
||||
self.start_url = start_url
|
||||
|
||||
def safe_page_operation(self, operation_fn, default_return=None):
|
||||
"""Safely execute a page operation with error handling"""
|
||||
if self.page is None:
|
||||
logger.warning("Attempted operation on None page object")
|
||||
return default_return
|
||||
|
||||
try:
|
||||
return operation_fn()
|
||||
except Exception as e:
|
||||
logger.debug(f"Page operation failed: {str(e)}")
|
||||
# Try to reclaim memory if possible
|
||||
try:
|
||||
self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
return default_return
|
||||
|
||||
# Convert and perform "Click Button" for example
|
||||
def call_action(self, action_name, selector=None, optional_value=None):
|
||||
async def call_action(self, action_name, selector=None, optional_value=None):
|
||||
if self.page is None:
|
||||
logger.warning("Cannot call action on None page object")
|
||||
return
|
||||
@@ -109,110 +90,107 @@ class steppable_browser_interface():
|
||||
if optional_value and ('{%' in optional_value or '{{' in optional_value):
|
||||
optional_value = jinja_render(template_str=optional_value)
|
||||
|
||||
try:
|
||||
action_handler(selector, optional_value)
|
||||
# Safely wait for timeout
|
||||
def wait_timeout():
|
||||
self.page.wait_for_timeout(1.5 * 1000)
|
||||
self.safe_page_operation(wait_timeout)
|
||||
logger.debug(f"Call action done in {time.time()-now:.2f}s")
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing action '{call_action_name}': {str(e)}")
|
||||
# Request garbage collection to free up resources after error
|
||||
# Trigger click and cautiously handle potential navigation
|
||||
# This means the page redirects/reloads/changes JS etc etc
|
||||
if call_action_name.startswith('click_'):
|
||||
try:
|
||||
self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
# Set up navigation expectation before the click (like sync version)
|
||||
async with self.page.expect_event("framenavigated", timeout=3000) as navigation_info:
|
||||
await action_handler(selector, optional_value)
|
||||
|
||||
# Check if navigation actually occurred
|
||||
try:
|
||||
await navigation_info.value # This waits for the navigation promise
|
||||
logger.debug(f"Navigation occurred on {call_action_name}.")
|
||||
except Exception:
|
||||
logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.")
|
||||
|
||||
except Exception as e:
|
||||
# If expect_event itself times out, that means no navigation occurred - that's OK
|
||||
if "framenavigated" in str(e) and "exceeded" in str(e):
|
||||
logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.")
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
# Some other action that probably a navigation is not expected
|
||||
await action_handler(selector, optional_value)
|
||||
|
||||
def action_goto_url(self, selector=None, value=None):
|
||||
|
||||
# Safely wait for timeout
|
||||
await self.page.wait_for_timeout(1.5 * 1000)
|
||||
logger.debug(f"Call action done in {time.time()-now:.2f}s")
|
||||
|
||||
async def action_goto_url(self, selector=None, value=None):
|
||||
if not value:
|
||||
logger.warning("No URL provided for goto_url action")
|
||||
return None
|
||||
|
||||
now = time.time()
|
||||
|
||||
def goto_operation():
|
||||
return self.page.goto(value, timeout=0, wait_until='load')
|
||||
|
||||
response = self.safe_page_operation(goto_operation)
|
||||
response = await self.page.goto(value, timeout=0, wait_until='load')
|
||||
logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
|
||||
return response
|
||||
|
||||
# Incase they request to go back to the start
|
||||
def action_goto_site(self, selector=None, value=None):
|
||||
return self.action_goto_url(value=self.start_url)
|
||||
async def action_goto_site(self, selector=None, value=None):
|
||||
return await self.action_goto_url(value=re.sub(r'^source:', '', self.start_url, flags=re.IGNORECASE))
|
||||
|
||||
def action_click_element_containing_text(self, selector=None, value=''):
|
||||
async def action_click_element_containing_text(self, selector=None, value=''):
|
||||
logger.debug("Clicking element containing text")
|
||||
if not value or not len(value.strip()):
|
||||
return
|
||||
|
||||
def click_operation():
|
||||
elem = self.page.get_by_text(value)
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(click_operation)
|
||||
elem = self.page.get_by_text(value)
|
||||
if await elem.count():
|
||||
await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
|
||||
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
||||
|
||||
async def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
||||
logger.debug("Clicking element containing text if exists")
|
||||
if not value or not len(value.strip()):
|
||||
return
|
||||
|
||||
def click_if_exists_operation():
|
||||
elem = self.page.get_by_text(value)
|
||||
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
elem = self.page.get_by_text(value)
|
||||
count = await elem.count()
|
||||
logger.debug(f"Clicking element containing text - {count} elements found")
|
||||
if count:
|
||||
await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(click_if_exists_operation)
|
||||
|
||||
def action_enter_text_in_field(self, selector, value):
|
||||
async def action_enter_text_in_field(self, selector, value):
|
||||
if not selector or not len(selector.strip()):
|
||||
return
|
||||
|
||||
def fill_operation():
|
||||
self.page.fill(selector, value, timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(fill_operation)
|
||||
await self.page.fill(selector, value, timeout=self.action_timeout)
|
||||
|
||||
def action_execute_js(self, selector, value):
|
||||
async def action_execute_js(self, selector, value):
|
||||
if not value:
|
||||
return None
|
||||
|
||||
def evaluate_operation():
|
||||
return self.page.evaluate(value)
|
||||
|
||||
return self.safe_page_operation(evaluate_operation)
|
||||
return await self.page.evaluate(value)
|
||||
|
||||
def action_click_element(self, selector, value):
|
||||
async def action_click_element(self, selector, value):
|
||||
logger.debug("Clicking element")
|
||||
if not selector or not len(selector.strip()):
|
||||
return
|
||||
|
||||
def click_operation():
|
||||
self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(click_operation)
|
||||
await self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
|
||||
|
||||
def action_click_element_if_exists(self, selector, value):
|
||||
async def action_click_element_if_exists(self, selector, value):
|
||||
import playwright._impl._errors as _api_types
|
||||
logger.debug("Clicking element if exists")
|
||||
if not selector or not len(selector.strip()):
|
||||
return
|
||||
|
||||
def click_if_exists_operation():
|
||||
try:
|
||||
self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
|
||||
except _api_types.TimeoutError:
|
||||
return
|
||||
except _api_types.Error:
|
||||
# Element was there, but page redrew and now its long long gone
|
||||
return
|
||||
try:
|
||||
await self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
|
||||
except _api_types.TimeoutError:
|
||||
return
|
||||
except _api_types.Error:
|
||||
# Element was there, but page redrew and now its long long gone
|
||||
return
|
||||
|
||||
self.safe_page_operation(click_if_exists_operation)
|
||||
|
||||
def action_click_x_y(self, selector, value):
|
||||
async def action_click_x_y(self, selector, value):
|
||||
if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
|
||||
logger.warning("'Click X,Y' step should be in the format of '100 , 90'")
|
||||
return
|
||||
@@ -222,57 +200,42 @@ class steppable_browser_interface():
|
||||
x = int(float(x.strip()))
|
||||
y = int(float(y.strip()))
|
||||
|
||||
def click_xy_operation():
|
||||
self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
|
||||
await self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(click_xy_operation)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing x,y coordinates: {str(e)}")
|
||||
|
||||
def action__select_by_option_text(self, selector, value):
|
||||
async def action__select_by_option_text(self, selector, value):
|
||||
if not selector or not len(selector.strip()):
|
||||
return
|
||||
|
||||
def select_operation():
|
||||
self.page.select_option(selector, label=value, timeout=self.action_timeout)
|
||||
await self.page.select_option(selector, label=value, timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(select_operation)
|
||||
async def action_scroll_down(self, selector, value):
|
||||
# Some sites this doesnt work on for some reason
|
||||
await self.page.mouse.wheel(0, 600)
|
||||
await self.page.wait_for_timeout(1000)
|
||||
|
||||
def action_scroll_down(self, selector, value):
|
||||
def scroll_operation():
|
||||
# Some sites this doesnt work on for some reason
|
||||
self.page.mouse.wheel(0, 600)
|
||||
self.page.wait_for_timeout(1000)
|
||||
|
||||
self.safe_page_operation(scroll_operation)
|
||||
|
||||
def action_wait_for_seconds(self, selector, value):
|
||||
async def action_wait_for_seconds(self, selector, value):
|
||||
try:
|
||||
seconds = float(value.strip()) if value else 1.0
|
||||
|
||||
def wait_operation():
|
||||
self.page.wait_for_timeout(seconds * 1000)
|
||||
|
||||
self.safe_page_operation(wait_operation)
|
||||
await self.page.wait_for_timeout(seconds * 1000)
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.error(f"Invalid value for wait_for_seconds: {str(e)}")
|
||||
|
||||
def action_wait_for_text(self, selector, value):
|
||||
async def action_wait_for_text(self, selector, value):
|
||||
if not value:
|
||||
return
|
||||
|
||||
import json
|
||||
v = json.dumps(value)
|
||||
|
||||
def wait_for_text_operation():
|
||||
self.page.wait_for_function(
|
||||
f'document.querySelector("body").innerText.includes({v});',
|
||||
timeout=30000
|
||||
)
|
||||
await self.page.wait_for_function(
|
||||
f'document.querySelector("body").innerText.includes({v});',
|
||||
timeout=30000
|
||||
)
|
||||
|
||||
self.safe_page_operation(wait_for_text_operation)
|
||||
|
||||
def action_wait_for_text_in_element(self, selector, value):
|
||||
async def action_wait_for_text_in_element(self, selector, value):
|
||||
if not selector or not value:
|
||||
return
|
||||
|
||||
@@ -280,82 +243,60 @@ class steppable_browser_interface():
|
||||
s = json.dumps(selector)
|
||||
v = json.dumps(value)
|
||||
|
||||
def wait_for_text_in_element_operation():
|
||||
self.page.wait_for_function(
|
||||
f'document.querySelector({s}).innerText.includes({v});',
|
||||
timeout=30000
|
||||
)
|
||||
|
||||
self.safe_page_operation(wait_for_text_in_element_operation)
|
||||
await self.page.wait_for_function(
|
||||
f'document.querySelector({s}).innerText.includes({v});',
|
||||
timeout=30000
|
||||
)
|
||||
|
||||
# @todo - in the future make some popout interface to capture what needs to be set
|
||||
# https://playwright.dev/python/docs/api/class-keyboard
|
||||
def action_press_enter(self, selector, value):
|
||||
def press_operation():
|
||||
self.page.keyboard.press("Enter", delay=randint(200, 500))
|
||||
async def action_press_enter(self, selector, value):
|
||||
await self.page.keyboard.press("Enter", delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(press_operation)
|
||||
|
||||
def action_press_page_up(self, selector, value):
|
||||
def press_operation():
|
||||
self.page.keyboard.press("PageUp", delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(press_operation)
|
||||
async def action_press_page_up(self, selector, value):
|
||||
await self.page.keyboard.press("PageUp", delay=randint(200, 500))
|
||||
|
||||
def action_press_page_down(self, selector, value):
|
||||
def press_operation():
|
||||
self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(press_operation)
|
||||
async def action_press_page_down(self, selector, value):
|
||||
await self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
||||
|
||||
def action_check_checkbox(self, selector, value):
|
||||
async def action_check_checkbox(self, selector, value):
|
||||
if not selector:
|
||||
return
|
||||
|
||||
await self.page.locator(selector).check(timeout=self.action_timeout)
|
||||
|
||||
async def action_uncheck_checkbox(self, selector, value):
|
||||
if not selector:
|
||||
return
|
||||
|
||||
def check_operation():
|
||||
self.page.locator(selector).check(timeout=self.action_timeout)
|
||||
await self.page.locator(selector).uncheck(timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(check_operation)
|
||||
|
||||
def action_uncheck_checkbox(self, selector, value):
|
||||
if not selector:
|
||||
return
|
||||
|
||||
def uncheck_operation():
|
||||
self.page.locator(selector).uncheck(timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(uncheck_operation)
|
||||
|
||||
def action_remove_elements(self, selector, value):
|
||||
async def action_remove_elements(self, selector, value):
|
||||
"""Removes all elements matching the given selector from the DOM."""
|
||||
if not selector:
|
||||
return
|
||||
|
||||
def remove_operation():
|
||||
self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
|
||||
|
||||
self.safe_page_operation(remove_operation)
|
||||
await self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
|
||||
|
||||
def action_make_all_child_elements_visible(self, selector, value):
|
||||
async def action_make_all_child_elements_visible(self, selector, value):
|
||||
"""Recursively makes all child elements inside the given selector fully visible."""
|
||||
if not selector:
|
||||
return
|
||||
|
||||
def make_visible_operation():
|
||||
self.page.locator(selector).locator("*").evaluate_all("""
|
||||
els => els.forEach(el => {
|
||||
el.style.display = 'block'; // Forces it to be displayed
|
||||
el.style.visibility = 'visible'; // Ensures it's not hidden
|
||||
el.style.opacity = '1'; // Fully opaque
|
||||
el.style.position = 'relative'; // Avoids 'absolute' hiding
|
||||
el.style.height = 'auto'; // Expands collapsed elements
|
||||
el.style.width = 'auto'; // Ensures full visibility
|
||||
el.removeAttribute('hidden'); // Removes hidden attribute
|
||||
el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
|
||||
})
|
||||
""")
|
||||
|
||||
self.safe_page_operation(make_visible_operation)
|
||||
await self.page.locator(selector).locator("*").evaluate_all("""
|
||||
els => els.forEach(el => {
|
||||
el.style.display = 'block'; // Forces it to be displayed
|
||||
el.style.visibility = 'visible'; // Ensures it's not hidden
|
||||
el.style.opacity = '1'; // Fully opaque
|
||||
el.style.position = 'relative'; // Avoids 'absolute' hiding
|
||||
el.style.height = 'auto'; // Expands collapsed elements
|
||||
el.style.width = 'auto'; // Ensures full visibility
|
||||
el.removeAttribute('hidden'); // Removes hidden attribute
|
||||
el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
|
||||
})
|
||||
""")
|
||||
|
||||
# Responsible for maintaining a live 'context' with the chrome CDP
|
||||
# @todo - how long do contexts live for anyway?
|
||||
@@ -389,21 +330,22 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
self.playwright_browser = playwright_browser
|
||||
self.start_url = start_url
|
||||
self._is_cleaned_up = False
|
||||
if self.context is None:
|
||||
self.connect(proxy=proxy)
|
||||
self.proxy = proxy
|
||||
# Note: connect() is now async and must be called separately
|
||||
|
||||
def __del__(self):
|
||||
# Ensure cleanup happens if object is garbage collected
|
||||
self.cleanup()
|
||||
# Note: cleanup is now async, so we can only mark as cleaned up here
|
||||
self._is_cleaned_up = True
|
||||
|
||||
# Connect and setup a new context
|
||||
def connect(self, proxy=None):
|
||||
async def connect(self, proxy=None):
|
||||
# Should only get called once - test that
|
||||
keep_open = 1000 * 60 * 5
|
||||
now = time.time()
|
||||
|
||||
# @todo handle multiple contexts, bind a unique id from the browser on each req?
|
||||
self.context = self.playwright_browser.new_context(
|
||||
self.context = await self.playwright_browser.new_context(
|
||||
accept_downloads=False, # Should never be needed
|
||||
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
|
||||
extra_http_headers=self.headers,
|
||||
@@ -414,7 +356,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
user_agent=manage_user_agent(headers=self.headers),
|
||||
)
|
||||
|
||||
self.page = self.context.new_page()
|
||||
self.page = await self.context.new_page()
|
||||
|
||||
# self.page.set_default_navigation_timeout(keep_open)
|
||||
self.page.set_default_timeout(keep_open)
|
||||
@@ -424,13 +366,15 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
||||
|
||||
logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
await self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
def mark_as_closed(self):
|
||||
logger.debug("Page closed, cleaning up..")
|
||||
self.cleanup()
|
||||
# Note: This is called from a sync context (event handler)
|
||||
# so we'll just mark as cleaned up and let __del__ handle the rest
|
||||
self._is_cleaned_up = True
|
||||
|
||||
def cleanup(self):
|
||||
async def cleanup(self):
|
||||
"""Properly clean up all resources to prevent memory leaks"""
|
||||
if self._is_cleaned_up:
|
||||
return
|
||||
@@ -441,7 +385,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
if hasattr(self, 'page') and self.page is not None:
|
||||
try:
|
||||
# Force garbage collection before closing
|
||||
self.page.request_gc()
|
||||
await self.page.request_gc()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error during page garbage collection: {str(e)}")
|
||||
|
||||
@@ -452,7 +396,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
logger.debug(f"Error removing event listeners: {str(e)}")
|
||||
|
||||
try:
|
||||
self.page.close()
|
||||
await self.page.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error closing page: {str(e)}")
|
||||
|
||||
@@ -461,7 +405,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
# Clean up context
|
||||
if hasattr(self, 'context') and self.context is not None:
|
||||
try:
|
||||
self.context.close()
|
||||
await self.context.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error closing context: {str(e)}")
|
||||
|
||||
@@ -483,12 +427,12 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
|
||||
return False
|
||||
|
||||
def get_current_state(self):
|
||||
async def get_current_state(self):
|
||||
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
|
||||
import importlib.resources
|
||||
import json
|
||||
# because we for now only run browser steps in playwright mode (not puppeteer mode)
|
||||
from changedetectionio.content_fetchers.playwright import capture_full_page
|
||||
from changedetectionio.content_fetchers.playwright import capture_full_page_async
|
||||
|
||||
# Safety check - don't proceed if resources are cleaned up
|
||||
if self._is_cleaned_up or self.page is None:
|
||||
@@ -498,29 +442,32 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||
|
||||
now = time.time()
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
await self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
try:
|
||||
# Get screenshot first
|
||||
screenshot = capture_full_page(page=self.page)
|
||||
screenshot = await capture_full_page_async(page=self.page)
|
||||
if not screenshot:
|
||||
logger.error("No screenshot was retrieved :((")
|
||||
|
||||
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||
|
||||
# Then get interactive elements
|
||||
now = time.time()
|
||||
self.page.evaluate("var include_filters=''")
|
||||
self.page.request_gc()
|
||||
await self.page.evaluate("var include_filters=''")
|
||||
await self.page.request_gc()
|
||||
|
||||
scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
|
||||
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
|
||||
xpath_data = json.loads(await self.page.evaluate(xpath_element_js, {
|
||||
"visualselector_xpath_selectors": scan_elements,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
}))
|
||||
self.page.request_gc()
|
||||
await self.page.request_gc()
|
||||
|
||||
# Sort elements by size
|
||||
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
||||
@@ -528,15 +475,21 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting current state: {str(e)}")
|
||||
# If the page has navigated (common with logins) then the context is destroyed on navigation, continue
|
||||
# I'm not sure that this is required anymore because we have the "expect navigation wrapper" at the top
|
||||
if "Execution context was destroyed" in str(e):
|
||||
logger.debug("Execution context was destroyed, most likely because of navigation, continuing...")
|
||||
pass
|
||||
|
||||
# Attempt recovery - force garbage collection
|
||||
try:
|
||||
self.page.request_gc()
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Request garbage collection one final time
|
||||
try:
|
||||
self.page.request_gc()
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
from playwright.sync_api import PlaywrightContextManager
|
||||
|
||||
# So playwright wants to run as a context manager, but we do something horrible and hacky
|
||||
# we are holding the session open for as long as possible, then shutting it down, and opening a new one
|
||||
# So it means we don't get to use PlaywrightContextManager' __enter__ __exit__
|
||||
# To work around this, make goodbye() act the same as the __exit__()
|
||||
#
|
||||
# But actually I think this is because the context is opened correctly with __enter__() but we timeout the connection
|
||||
# then theres some lock condition where we cant destroy it without it hanging
|
||||
|
||||
class c_PlaywrightContextManager(PlaywrightContextManager):
|
||||
|
||||
def goodbye(self) -> None:
|
||||
self.__exit__()
|
||||
|
||||
def c_sync_playwright() -> PlaywrightContextManager:
|
||||
return c_PlaywrightContextManager()
|
||||
@@ -33,7 +33,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def long_task(uuid, preferred_proxy):
|
||||
import time
|
||||
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
|
||||
status = {'status': '', 'length': 0, 'text': ''}
|
||||
|
||||
|
||||
98
changedetectionio/blueprint/cookie_preferences.py
Normal file
98
changedetectionio/blueprint/cookie_preferences.py
Normal file
@@ -0,0 +1,98 @@
|
||||
from typing import Dict, Any
|
||||
from flask import request
|
||||
|
||||
|
||||
class PreferenceManager:
|
||||
"""
|
||||
Manages user preferences with cookie persistence.
|
||||
|
||||
Handles reading from cookies, overriding with URL query parameters,
|
||||
and setting cookies when preferences are updated.
|
||||
"""
|
||||
|
||||
def __init__(self, preferences_config: Dict[str, Dict[str, Any]], cookie_scope: str = 'path'):
|
||||
"""
|
||||
Initialize the preference manager.
|
||||
|
||||
Args:
|
||||
preferences_config: Dict defining preferences with their defaults and types
|
||||
e.g., {'diff_type': {'default': 'diffLines', 'type': 'value'}}
|
||||
cookie_scope: 'path' for current path only, 'global' for entire application
|
||||
"""
|
||||
self.config = preferences_config
|
||||
self.cookie_scope = cookie_scope
|
||||
self.preferences = {}
|
||||
self.cookies_updated = False
|
||||
|
||||
def load_preferences(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Load preferences from cookies and override with URL query parameters.
|
||||
|
||||
URL query parameters act as temporary overrides but don't update cookies.
|
||||
|
||||
Returns:
|
||||
Dict containing current preference values
|
||||
"""
|
||||
for key, config in self.config.items():
|
||||
# Read from cookie first (or use default)
|
||||
if config['type'] == 'bool':
|
||||
if key in request.cookies:
|
||||
# Cookie exists, use its value
|
||||
self.preferences[key] = request.cookies.get(key) == 'on'
|
||||
else:
|
||||
# No cookie, use configured default
|
||||
self.preferences[key] = config['default']
|
||||
else:
|
||||
self.preferences[key] = request.cookies.get(key, config['default'])
|
||||
|
||||
# URL query parameters override (but don't update cookies)
|
||||
if key in request.args:
|
||||
if config['type'] == 'bool':
|
||||
self.preferences[key] = request.args.get(key) == 'on'
|
||||
else:
|
||||
self.preferences[key] = request.args.get(key, config['default'])
|
||||
|
||||
return self.preferences
|
||||
|
||||
def load_from_form(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Load preferences from POST form data and mark for cookie updates.
|
||||
|
||||
For checkboxes: absence in form.data means unchecked = False.
|
||||
|
||||
Returns:
|
||||
Dict containing preference values from form
|
||||
"""
|
||||
self.cookies_updated = True
|
||||
|
||||
for key, config in self.config.items():
|
||||
if config['type'] == 'bool':
|
||||
# Checkbox: present = on, absent = off
|
||||
self.preferences[key] = key in request.form and request.form.get(key) == 'on'
|
||||
else:
|
||||
# Value field: get from form or use default
|
||||
self.preferences[key] = request.form.get(key, config['default'])
|
||||
|
||||
return self.preferences
|
||||
|
||||
def apply_cookies_to_response(self, response, max_age: int = 365 * 24 * 60 * 60):
|
||||
"""
|
||||
Apply cookies to the response if preferences were updated.
|
||||
|
||||
Args:
|
||||
response: Flask response object
|
||||
max_age: Cookie expiration time in seconds (default: 1 year)
|
||||
|
||||
Returns:
|
||||
Modified response object
|
||||
"""
|
||||
if not self.cookies_updated:
|
||||
return response
|
||||
|
||||
cookie_path = request.path if self.cookie_scope == 'path' else '/'
|
||||
|
||||
for key, value in self.preferences.items():
|
||||
cookie_value = 'on' if value is True else ('off' if value is False else value)
|
||||
response.set_cookie(key, cookie_value, max_age=max_age, path=cookie_path)
|
||||
|
||||
return response
|
||||
@@ -1,6 +1,7 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
import_distill_io_json,
|
||||
@@ -24,7 +25,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
importer_handler = import_url_list()
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||
for uuid in importer_handler.new_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
if len(importer_handler.remaining_data) == 0:
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -37,7 +38,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
d_importer = import_distill_io_json()
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
for uuid in d_importer.new_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# XLSX importer
|
||||
if request.files and request.files.get('xlsx_file'):
|
||||
@@ -60,7 +61,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
w_importer.run(data=file, flash=flash, datastore=datastore)
|
||||
|
||||
for uuid in w_importer.new_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
||||
|
||||
@@ -4,6 +4,7 @@ from flask import Blueprint, flash, redirect, url_for
|
||||
from flask_login import login_required
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from queue import PriorityQueue
|
||||
|
||||
PRICE_DATA_TRACK_ACCEPT = 'accepted'
|
||||
@@ -19,7 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
@login_required
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.notification.handler import apply_service_tweaks
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from feedgen.feed import FeedGenerator
|
||||
from flask import Blueprint, make_response, request, url_for, redirect
|
||||
@@ -108,18 +109,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
fe.link(link=diff_link)
|
||||
|
||||
# @todo watch should be a getter - watch.get('title') (internally if URL else..)
|
||||
# Same logic as watch-overview.html
|
||||
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
|
||||
watch_label = watch.label
|
||||
else:
|
||||
watch_label = watch.get('url')
|
||||
|
||||
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
||||
fe.title(title=watch_title)
|
||||
fe.title(title=watch_label)
|
||||
try:
|
||||
|
||||
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
||||
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
|
||||
include_equal=False,
|
||||
line_feed_sep="<br>",
|
||||
html_colour=html_colour_enable
|
||||
include_equal=False
|
||||
)
|
||||
|
||||
|
||||
requested_output_format = 'htmlcolor' if html_colour_enable else 'html'
|
||||
html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
|
||||
|
||||
@@ -127,7 +134,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# @todo User could decide if <link> goes to the diff page, or to the watch link
|
||||
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
|
||||
|
||||
content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link)
|
||||
content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link)
|
||||
|
||||
# Out of range chars could also break feedgen
|
||||
if scan_invalid_chars_in_rss(content):
|
||||
|
||||
@@ -67,7 +67,32 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
del (app_update['password'])
|
||||
|
||||
datastore.data['settings']['application'].update(app_update)
|
||||
|
||||
# Handle dynamic worker count adjustment
|
||||
old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
|
||||
new_worker_count = form.data['requests'].get('workers', 1)
|
||||
|
||||
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||
|
||||
# Adjust worker count if it changed
|
||||
if new_worker_count != old_worker_count:
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
|
||||
|
||||
result = worker_handler.adjust_async_worker_count(
|
||||
new_count=new_worker_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=app,
|
||||
datastore=ds
|
||||
)
|
||||
|
||||
if result['status'] == 'success':
|
||||
flash(f"Worker count adjusted: {result['message']}", 'notice')
|
||||
elif result['status'] == 'not_supported':
|
||||
flash("Dynamic worker adjustment not supported for sync workers", 'warning')
|
||||
elif result['status'] == 'error':
|
||||
flash(f"Error adjusting workers: {result['message']}", 'error')
|
||||
|
||||
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
|
||||
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
|
||||
@@ -94,7 +119,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
||||
min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
timezone_default_config=datastore.data['settings']['application'].get('timezone'),
|
||||
timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
||||
utc_time=utc_time,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script>
|
||||
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
|
||||
@@ -72,33 +72,23 @@
|
||||
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.pager_size) }}
|
||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
||||
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">
|
||||
Choose a default proxy for all watches
|
||||
</span>
|
||||
<div class="grey-form-border">
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
|
||||
<span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
@@ -135,6 +125,16 @@
|
||||
{{ render_field(form.application.form.webdriver_delay) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.workers) }}
|
||||
{% set worker_info = get_worker_status_info() %}
|
||||
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
|
||||
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.timeout) }}
|
||||
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.default_ua) }}
|
||||
<span class="pure-form-message-inline">
|
||||
@@ -193,11 +193,17 @@ nav
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.strip_ignored_lines) }}
|
||||
<span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
</span>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="api">
|
||||
<h4>API Access</h4>
|
||||
<p>Drive your changedetection.io via API, More about <a href="https://github.com/dgtlmoon/changedetection.io/wiki/API-Reference">API access here</a></p>
|
||||
<p>Drive your changedetection.io via API, More about <a href="https://changedetection.io/docs/api_v1/index.html">API access and examples here</a>.</p>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
|
||||
@@ -232,11 +238,9 @@ nav
|
||||
<p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
||||
<p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
||||
<p>
|
||||
{{ render_field(form.application.form.timezone) }}
|
||||
{{ render_field(form.application.form.scheduler_timezone_default) }}
|
||||
<datalist id="timezones" style="display: none;">
|
||||
{% for tz_name in available_timezones %}
|
||||
<option value="{{ tz_name }}">{{ tz_name }}</option>
|
||||
{% endfor %}
|
||||
{%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%}
|
||||
</datalist>
|
||||
</p>
|
||||
</div>
|
||||
@@ -246,6 +250,22 @@ nav
|
||||
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
|
||||
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }}
|
||||
<span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
|
||||
<span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.pager_size) }}
|
||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="proxies">
|
||||
<div id="recommended-proxy">
|
||||
@@ -294,23 +314,33 @@ nav
|
||||
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_field(form.requests.form.extra_proxies) }}
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
<span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br>
|
||||
<span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">Choose a default proxy for all watches</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group" id="extra-browsers-setting">
|
||||
<p>
|
||||
<span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br>
|
||||
<span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span>
|
||||
</p>
|
||||
{{ render_field(form.requests.form.extra_browsers) }}
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div id="actions">
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(form.save_button) }}
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-small button-cancel">Back</a>
|
||||
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">Back</a>
|
||||
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">Clear Snapshot History</a>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_ternary_field %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script>
|
||||
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}";
|
||||
@@ -64,7 +64,7 @@
|
||||
<div class="tab-pane-inner" id="notifications">
|
||||
<fieldset>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_muted) }}
|
||||
{{ render_ternary_field(form.notification_muted, BooleanField=True) }}
|
||||
</div>
|
||||
{% if 1 %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
<legend>Add a new organisational tag</legend>
|
||||
<div id="watch-add-wrapper-zone">
|
||||
<div>
|
||||
{{ render_simple_field(form.name, placeholder="watch label / tag") }}
|
||||
{{ render_simple_field(form.name, placeholder="Watch group / tag") }}
|
||||
</div>
|
||||
<div>
|
||||
{{ render_simple_field(form.save_button, title="Save" ) }}
|
||||
|
||||
@@ -1,14 +1,112 @@
|
||||
import time
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||
from loguru import logger
|
||||
from functools import wraps
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint
|
||||
from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint
|
||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_update_threads, queuedWatchMetaData):
|
||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
from flask import request, flash
|
||||
|
||||
if op == 'delete':
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.delete(uuid)
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches deleted")
|
||||
|
||||
elif op == 'pause':
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['paused'] = True
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches paused")
|
||||
|
||||
elif op == 'unpause':
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches unpaused")
|
||||
|
||||
elif (op == 'mark-viewed'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.set_last_viewed(uuid, int(time.time()))
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches updated")
|
||||
|
||||
elif (op == 'mute'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = True
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches muted")
|
||||
|
||||
elif (op == 'unmute'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = False
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches un-muted")
|
||||
|
||||
elif (op == 'recheck'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Recheck and require a full reprocessing
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches queued for rechecking")
|
||||
|
||||
elif (op == 'clear-errors'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]["last_error"] = False
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches errors cleared")
|
||||
|
||||
elif (op == 'clear-history'):
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.clear_watch_history(uuid)
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches cleared/reset.")
|
||||
|
||||
elif (op == 'notification-default'):
|
||||
from changedetectionio.notification import (
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
)
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_title'] = None
|
||||
datastore.data['watching'][uuid]['notification_body'] = None
|
||||
datastore.data['watching'][uuid]['notification_urls'] = []
|
||||
datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches set to use default notification settings")
|
||||
|
||||
elif (op == 'assign-tag'):
|
||||
op_extradata = extra_data
|
||||
if op_extradata:
|
||||
tag_uuid = datastore.add_tag(title=op_extradata)
|
||||
if op_extradata and tag_uuid:
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Bug in old versions caused by bad edit page/tag handler
|
||||
if isinstance(datastore.data['watching'][uuid]['tags'], str):
|
||||
datastore.data['watching'][uuid]['tags'] = []
|
||||
|
||||
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches were tagged")
|
||||
|
||||
if uuids:
|
||||
for uuid in uuids:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update):
|
||||
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
||||
|
||||
# Register the edit blueprint
|
||||
@@ -20,9 +118,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
ui_blueprint.register_blueprint(notification_blueprint)
|
||||
|
||||
# Register the views blueprint
|
||||
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData)
|
||||
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update)
|
||||
ui_blueprint.register_blueprint(views_blueprint)
|
||||
|
||||
|
||||
# Import the login decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -35,7 +133,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
flash('Watch not found', 'error')
|
||||
else:
|
||||
flash("Cleared snapshot history for watch {}".format(uuid))
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/clear_history", methods=['GET', 'POST'])
|
||||
@@ -47,7 +144,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
if confirmtext == 'clear':
|
||||
for uuid in datastore.data['watching'].keys():
|
||||
datastore.clear_watch_history(uuid)
|
||||
|
||||
flash("Cleared snapshot history for all watches")
|
||||
else:
|
||||
flash('Incorrect confirmation text.', 'error')
|
||||
@@ -63,12 +159,20 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
def mark_all_viewed():
|
||||
# Save the current newest history as the most recently viewed
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
tag_limit = request.args.get('tag')
|
||||
logger.debug(f"Limiting to tag {tag_limit}")
|
||||
now = int(time.time())
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
datastore.set_last_viewed(watch_uuid, int(time.time()))
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
if tag_limit and ( not watch.get('tags') or tag_limit not in watch['tags'] ):
|
||||
logger.debug(f"Skipping watch {watch_uuid}")
|
||||
continue
|
||||
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=tag_limit))
|
||||
|
||||
@ui_blueprint.route("/delete", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@@ -98,7 +202,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
new_uuid = datastore.clone(uuid)
|
||||
|
||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
|
||||
flash('Cloned, you are editing the new watch.')
|
||||
|
||||
@@ -114,13 +218,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
|
||||
i = 0
|
||||
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
running_uuids.append(t.current_uuid)
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
if uuid:
|
||||
if uuid not in running_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
i += 1
|
||||
|
||||
else:
|
||||
@@ -137,7 +239,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
if tag != None and tag not in watch['tags']:
|
||||
continue
|
||||
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
i += 1
|
||||
|
||||
if i == 1:
|
||||
@@ -153,100 +255,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
@login_optionally_required
|
||||
def form_watch_list_checkbox_operations():
|
||||
op = request.form['op']
|
||||
uuids = request.form.getlist('uuids')
|
||||
|
||||
if (op == 'delete'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.delete(uuid.strip())
|
||||
flash("{} watches deleted".format(len(uuids)))
|
||||
|
||||
elif (op == 'pause'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = True
|
||||
flash("{} watches paused".format(len(uuids)))
|
||||
|
||||
elif (op == 'unpause'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
flash("{} watches unpaused".format(len(uuids)))
|
||||
|
||||
elif (op == 'mark-viewed'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.set_last_viewed(uuid, int(time.time()))
|
||||
flash("{} watches updated".format(len(uuids)))
|
||||
|
||||
elif (op == 'mute'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['notification_muted'] = True
|
||||
flash("{} watches muted".format(len(uuids)))
|
||||
|
||||
elif (op == 'unmute'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['notification_muted'] = False
|
||||
flash("{} watches un-muted".format(len(uuids)))
|
||||
|
||||
elif (op == 'recheck'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Recheck and require a full reprocessing
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
flash("{} watches queued for rechecking".format(len(uuids)))
|
||||
|
||||
elif (op == 'clear-errors'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]["last_error"] = False
|
||||
flash(f"{len(uuids)} watches errors cleared")
|
||||
|
||||
elif (op == 'clear-history'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.clear_watch_history(uuid)
|
||||
flash("{} watches cleared/reset.".format(len(uuids)))
|
||||
|
||||
elif (op == 'notification-default'):
|
||||
from changedetectionio.notification import (
|
||||
default_notification_format_for_watch
|
||||
)
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['notification_title'] = None
|
||||
datastore.data['watching'][uuid.strip()]['notification_body'] = None
|
||||
datastore.data['watching'][uuid.strip()]['notification_urls'] = []
|
||||
datastore.data['watching'][uuid.strip()]['notification_format'] = default_notification_format_for_watch
|
||||
flash("{} watches set to use default notification settings".format(len(uuids)))
|
||||
|
||||
elif (op == 'assign-tag'):
|
||||
op_extradata = request.form.get('op_extradata', '').strip()
|
||||
if op_extradata:
|
||||
tag_uuid = datastore.add_tag(title=op_extradata)
|
||||
if op_extradata and tag_uuid:
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Bug in old versions caused by bad edit page/tag handler
|
||||
if isinstance(datastore.data['watching'][uuid]['tags'], str):
|
||||
datastore.data['watching'][uuid]['tags'] = []
|
||||
|
||||
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
||||
|
||||
flash(f"{len(uuids)} watches were tagged")
|
||||
uuids = [u.strip() for u in request.form.getlist('uuids') if u]
|
||||
extra_data = request.form.get('op_extradata', '').strip()
|
||||
_handle_operations(
|
||||
datastore=datastore,
|
||||
extra_data=extra_data,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
uuids=uuids,
|
||||
worker_handler=worker_handler,
|
||||
update_q=update_q,
|
||||
watch_check_update=watch_check_update,
|
||||
op=op,
|
||||
)
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ from jinja2 import Environment, FileSystemLoader
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio.time_handler import is_within_schedule
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||
@@ -186,7 +187,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
tz_name = time_schedule_limit.get('timezone')
|
||||
if not tz_name:
|
||||
tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
|
||||
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
|
||||
|
||||
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
||||
try:
|
||||
@@ -201,7 +202,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
#############################
|
||||
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -241,6 +242,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'available_timezones': sorted(available_timezones()),
|
||||
'browser_steps_config': browser_step_ui_config,
|
||||
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '',
|
||||
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
|
||||
'extra_processor_config': form.extra_tab_content(),
|
||||
'extra_title': f" - Edit - {watch.label}",
|
||||
@@ -255,7 +257,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
|
||||
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
|
||||
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
|
||||
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
|
||||
'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch,
|
||||
|
||||
@@ -2,6 +2,7 @@ from flask import Blueprint, request, make_response
|
||||
import random
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -19,6 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
import apprise
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
from changedetectionio.notification.apprise_plugin.assets import apprise_asset
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
|
||||
from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler
|
||||
|
||||
@@ -37,11 +39,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400)
|
||||
|
||||
watch = datastore.data['watching'].get(watch_uuid)
|
||||
|
||||
notification_urls = None
|
||||
|
||||
if request.form.get('notification_urls'):
|
||||
notification_urls = request.form['notification_urls'].strip().splitlines()
|
||||
notification_urls = request.form.get('notification_urls','').strip().splitlines()
|
||||
|
||||
if not notification_urls:
|
||||
logger.debug("Test notification - Trying by group/tag in the edit form if available")
|
||||
@@ -61,20 +59,26 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return 'Error: No Notification URLs set/found'
|
||||
|
||||
for n_url in notification_urls:
|
||||
# We are ONLY validating the apprise:// part here, convert all tags to something so as not to break apprise URLs
|
||||
generic_notification_context_data = NotificationContextData()
|
||||
generic_notification_context_data.set_random_for_validation()
|
||||
n_url = jinja_render(template_str=n_url, **generic_notification_context_data).strip()
|
||||
if len(n_url.strip()):
|
||||
if not apobj.add(n_url):
|
||||
return f'Error: {n_url} is not a valid AppRise URL.'
|
||||
|
||||
try:
|
||||
# use the same as when it is triggered, but then override it with the form test values
|
||||
n_object = {
|
||||
n_object = NotificationContextData({
|
||||
'watch_url': request.form.get('window_url', "https://changedetection.io"),
|
||||
'notification_urls': notification_urls
|
||||
}
|
||||
})
|
||||
|
||||
# Only use if present, if not set in n_object it should use the default system value
|
||||
if 'notification_format' in request.form and request.form['notification_format'].strip():
|
||||
n_object['notification_format'] = request.form.get('notification_format', '').strip()
|
||||
else:
|
||||
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
if 'notification_title' in request.form and request.form['notification_title'].strip():
|
||||
n_object['notification_title'] = request.form.get('notification_title', '').strip()
|
||||
|
||||
@@ -8,15 +8,19 @@
|
||||
{% endif %}
|
||||
|
||||
const highlight_submit_ignore_url="{{url_for('ui.ui_edit.highlight_submit_ignore_url', uuid=uuid)}}";
|
||||
|
||||
const watch_url= {{watch_a.link|tojson}};
|
||||
</script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/html2canvas@1.4.1/dist/html2canvas.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/piexifjs@1.0.6/piexif.min.js"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='snippet-to-image.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
|
||||
|
||||
<div id="settings">
|
||||
<form class="pure-form " action="" method="GET" id="diff-form">
|
||||
<form class="pure-form " action="{{ url_for("ui.ui_views.diff_history_page", uuid=uuid) }}" method="POST" id="diff-form">
|
||||
<fieldset class="diff-fieldset">
|
||||
{% if versions|length >= 1 %}
|
||||
<strong>Compare</strong>
|
||||
from {{from_version}} to {{to_version}}<br>
|
||||
<del class="change"><span>from</span></del>
|
||||
<select id="diff-version" name="from_version" class="needs-localtime">
|
||||
{% for version in versions|reverse %}
|
||||
@@ -37,26 +41,29 @@
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
<fieldset>
|
||||
<strong>Style</strong>
|
||||
<label for="diffWords" class="pure-checkbox">
|
||||
<input type="radio" name="diff_type" id="diffWords" value="diffWords"> Words</label>
|
||||
<label for="diffLines" class="pure-checkbox">
|
||||
<input type="radio" name="diff_type" id="diffLines" value="diffLines" checked=""> Lines</label>
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<strong>Style</strong>
|
||||
|
||||
<label for="diffChars" class="pure-checkbox">
|
||||
<input type="radio" name="diff_type" id="diffChars" value="diffChars"> Chars</label>
|
||||
<!-- @todo - when mimetype is JSON, select this by default? -->
|
||||
<label for="diffJson" class="pure-checkbox">
|
||||
<input type="radio" name="diff_type" id="diffJson" value="diffJson"> JSON</label>
|
||||
<label for="diffWords" class="pure-checkbox">
|
||||
<input type="radio" name="diff_type" id="diffWords" value="diffWords" {% if diff_prefs.diff_type == 'diffWords' %}checked=""{% endif %}> Words</label>
|
||||
<label for="diffLines" class="pure-checkbox">
|
||||
<input type="radio" name="diff_type" id="diffLines" value="diffLines" {% if diff_prefs.diff_type == 'diffLines' %}checked=""{% endif %}> Lines</label>
|
||||
|
||||
<span>
|
||||
<!-- https://github.com/kpdecker/jsdiff/issues/389 ? -->
|
||||
<label for="ignoreWhitespace" class="pure-checkbox" id="label-diff-ignorewhitespace">
|
||||
<input type="checkbox" id="ignoreWhitespace" name="ignoreWhitespace"> Ignore Whitespace</label>
|
||||
</span>
|
||||
<label for="ignoreWhitespace" class="pure-checkbox" id="label-diff-ignorewhitespace">
|
||||
<input type="checkbox" name="ignoreWhitespace" {% if diff_prefs.ignoreWhitespace %}checked=""{% endif %}> Ignore Whitespace</label>
|
||||
|
||||
<label for="changesOnly" class="pure-checkbox" id="label-diff-changes">
|
||||
<input type="checkbox" name="diff_changesOnly" {% if diff_prefs.diff_changesOnly %}checked=""{% endif %}> Changes only</label>
|
||||
|
||||
<label for="changesOnly" class="pure-checkbox" id="label-diff-removed">
|
||||
<input type="checkbox" name="diff_removed" {% if diff_prefs.diff_removed %}checked=""{% endif %}> Removed</label>
|
||||
<label for="changesOnly" class="pure-checkbox" id="label-diff-added">
|
||||
<input type="checkbox" name="diff_added" {% if diff_prefs.diff_added %}checked=""{% endif %}> Added</label>
|
||||
<label for="changesOnly" class="pure-checkbox" id="label-diff-replaced">
|
||||
<input type="checkbox" name="diff_replaced" {% if diff_prefs.diff_replaced %}checked=""{% endif %}> Replaced</label>
|
||||
<input type="submit">
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
<div id="diff-jump">
|
||||
@@ -88,26 +95,30 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="text">
|
||||
<button id="share-as-image-btn" onclick="diffToJpeg()" title="Share diff as image" style="float: right; margin: 10px; padding: 8px 12px; background: #4CAF50; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 14px;">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 4px;">
|
||||
<rect x="3" y="3" width="18" height="18" rx="2" ry="2"></rect>
|
||||
<circle cx="8.5" cy="8.5" r="1.5"></circle>
|
||||
<polyline points="21 15 16 10 5 21"></polyline>
|
||||
</svg>
|
||||
Share as Image
|
||||
</button>
|
||||
{% if password_enabled_and_share_is_off %}
|
||||
<div class="tip">Pro-tip: You can enable <strong>"share access when password is enabled"</strong> from settings</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="snapshot-age">{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}</div>
|
||||
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<!-- just proof of concept copied straight from github.com/kpdecker/jsdiff -->
|
||||
<td id="a" style="display: none;">{{from_version_file_contents}}</td>
|
||||
<td id="b" style="display: none;">{{to_version_file_contents}}</td>
|
||||
<td id="diff-col">
|
||||
<span id="result" class="highlightable-filter"></span>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
Diff algorithm from the amazing <a href="https://github.com/kpdecker/jsdiff">github.com/kpdecker/jsdiff</a>
|
||||
<table>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td id="diff-col" class="highlightable-filter">
|
||||
<pre id="difference" style="border-left: 2px solid #ddd;">{{ content| diff_unescape_difference_spans }}</pre>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="screenshot">
|
||||
<div class="tip">
|
||||
For now, Differences are performed on text, not graphically, only the latest screenshot is available.
|
||||
@@ -159,8 +170,6 @@
|
||||
<script>
|
||||
const newest_version_timestamp = {{newest_version_timestamp}};
|
||||
</script>
|
||||
<script src="{{url_for('static_content', group='js', filename='diff.min.js')}}"></script>
|
||||
|
||||
<script src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, highlight_trigger_ignored_explainer, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||
@@ -72,15 +72,16 @@
|
||||
<div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
|
||||
<div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.tags) }}
|
||||
<span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.title, class="m-d") }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.tags) }}
|
||||
<span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
|
||||
{{ render_field(form.title, class="m-d", placeholder=watch.label) }}
|
||||
<span class="pure-form-message-inline">Automatically uses the page title if found, you can also use your own title/description here</span>
|
||||
</div>
|
||||
<div class="pure-control-group time-between-check border-fieldset">
|
||||
|
||||
@@ -101,15 +102,16 @@
|
||||
</div>
|
||||
<br>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.extract_title_as_title) }}
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.filter_failure_notification_send) }}
|
||||
<span class="pure-form-message-inline">
|
||||
Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_ternary_field(form.use_page_title_in_list) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
@@ -239,7 +241,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</div>
|
||||
<div id="browser-steps-fieldlist" >
|
||||
<span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
<span id="browser-seconds-remaining">Press "Play" to start.</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
|
||||
{{ render_field(form.browser_steps) }}
|
||||
</div>
|
||||
</div>
|
||||
@@ -262,7 +264,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
<div class="tab-pane-inner" id="notifications">
|
||||
<fieldset>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_muted) }}
|
||||
{{ render_ternary_field(form.notification_muted, BooleanField=true) }}
|
||||
</div>
|
||||
{% if watch_needs_selenium_or_playwright %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
@@ -349,21 +351,22 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</div>
|
||||
<div id="text-preview" style="display: none;" >
|
||||
<script>
|
||||
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
|
||||
</script>
|
||||
<br>
|
||||
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
|
||||
<div class="minitabs-wrapper">
|
||||
<div class="minitabs-content">
|
||||
<div id="text-preview-inner" class="monospace-preview">
|
||||
<p>Loading...</p>
|
||||
</div>
|
||||
<div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
|
||||
<p>Loading...</p>
|
||||
</div>
|
||||
<script>
|
||||
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";
|
||||
</script>
|
||||
<br>
|
||||
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
|
||||
<div class="minitabs-wrapper">
|
||||
<div class="minitabs-content">
|
||||
<div id="text-preview-inner" class="monospace-preview">
|
||||
<p>Loading...</p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="text-preview-before-inner" style="display: none;" class="monospace-preview">
|
||||
<p>Loading...</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{ highlight_trigger_ignored_explainer() }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -469,11 +472,11 @@ Math: {{ 1 + 1 }}") }}
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(form.save_button) }}
|
||||
<a href="{{url_for('ui.form_delete', uuid=uuid)}}"
|
||||
class="pure-button button-small button-error ">Delete</a>
|
||||
class="pure-button button-error ">Delete</a>
|
||||
{% if watch.history_n %}<a href="{{url_for('ui.clear_watch_history', uuid=uuid)}}"
|
||||
class="pure-button button-small button-error ">Clear History</a>{% endif %}
|
||||
class="pure-button button-error">Clear History</a>{% endif %}
|
||||
<a href="{{url_for('ui.form_clone', uuid=uuid)}}"
|
||||
class="pure-button button-small ">Clone & Edit</a>
|
||||
class="pure-button">Clone & Edit</a>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
@@ -1,9 +1,11 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% from '_helpers.html' import highlight_trigger_ignored_explainer %}
|
||||
{% block content %}
|
||||
<script>
|
||||
const screenshot_url = "{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
||||
const triggered_line_numbers = {{ triggered_line_numbers|tojson }};
|
||||
const triggered_line_numbers = {{ highlight_triggered_line_numbers|tojson }};
|
||||
const ignored_line_numbers = {{ highlight_ignored_line_numbers|tojson }};
|
||||
const blocked_line_numbers = {{ highlight_blocked_line_numbers|tojson }};
|
||||
{% if last_error_screenshot %}
|
||||
const error_screenshot_url = "{{url_for('static_content', group='screenshot', filename=uuid, error_screenshot=1) }}";
|
||||
{% endif %}
|
||||
@@ -82,6 +84,7 @@
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
{{ highlight_trigger_ignored_explainer() }}
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="screenshot">
|
||||
@@ -1,16 +1,80 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
||||
from flask_login import current_user
|
||||
import os
|
||||
import time
|
||||
from copy import deepcopy
|
||||
import re
|
||||
from loguru import logger
|
||||
from markupsafe import Markup
|
||||
|
||||
from changedetectionio.diff import REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE
|
||||
from changedetectionio.notification.handler import apply_html_color_to_body
|
||||
from changedetectionio.notification_service import CUSTOM_LINEBREAK_PLACEHOLDER
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio import html_tools, diff
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.blueprint.cookie_preferences import PreferenceManager
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
|
||||
views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates")
|
||||
|
||||
|
||||
# Diff display preferences configuration
|
||||
DIFF_PREFERENCES_CONFIG = {
|
||||
'diff_changesOnly': {'default': False, 'type': 'bool'},
|
||||
'diff_ignoreWhitespace': {'default': False, 'type': 'bool'},
|
||||
'diff_removed': {'default': True, 'type': 'bool'},
|
||||
'diff_added': {'default': True, 'type': 'bool'},
|
||||
'diff_replaced': {'default': True, 'type': 'bool'},
|
||||
'diff_type': {'default': 'diffLines', 'type': 'value'},
|
||||
}
|
||||
|
||||
@views_blueprint.app_template_filter('diff_unescape_difference_spans')
|
||||
def diff_unescape_difference_spans(content):
|
||||
"""Emulate Jinja2's auto-escape, then selectively unescape our diff spans."""
|
||||
from markupsafe import escape
|
||||
|
||||
if not content:
|
||||
return Markup('')
|
||||
|
||||
# Step 1: Escape everything like Jinja2 would (this makes it XSS-safe)
|
||||
escaped_content = escape(str(content))
|
||||
|
||||
# Step 2: Unescape only our exact diff spans generated by apply_html_color_to_body()
|
||||
# Pattern matches the exact structure:
|
||||
# <span style="{STYLE}" role="{ROLE}" aria-label="{LABEL}" title="{TITLE}">
|
||||
|
||||
# Unescape outer span opening tags with full attributes (role, aria-label, title)
|
||||
# Matches removed/added/changed/changed_into spans
|
||||
result = re.sub(
|
||||
rf'<span style="({re.escape(REMOVED_STYLE)}|{re.escape(ADDED_STYLE)})" '
|
||||
rf'role="(deletion|insertion|note)" '
|
||||
rf'aria-label="([^&]+?)" '
|
||||
rf'title="([^&]+?)">',
|
||||
r'<span style="\1" role="\2" aria-label="\3" title="\4">',
|
||||
str(escaped_content),
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# Unescape inner span opening tags (without additional attributes)
|
||||
# This matches the darker background styles for changed parts within lines
|
||||
result = re.sub(
|
||||
rf'<span style="({re.escape(REMOVED_INNER_STYLE)}|{re.escape(ADDED_INNER_STYLE)})">',
|
||||
r'<span style="\1">',
|
||||
result,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
|
||||
# Unescape closing tags (but only as many as we opened)
|
||||
open_count = result.count('<span style=')
|
||||
close_count = str(escaped_content).count('</span>')
|
||||
|
||||
# Replace up to the number of spans we opened
|
||||
for _ in range(min(open_count, close_count)):
|
||||
result = result.replace('</span>', '</span>', 1)
|
||||
|
||||
# Not necessary because the CSS/HTML will lay it out by linefeed
|
||||
result = result.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '')
|
||||
return Markup(result)
|
||||
|
||||
@views_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
@@ -34,7 +98,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
triggered_line_numbers = []
|
||||
ignored_line_numbers = []
|
||||
blocked_line_numbers = []
|
||||
|
||||
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
|
||||
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
|
||||
else:
|
||||
@@ -50,36 +118,82 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
content = watch.get_history_snapshot(timestamp)
|
||||
|
||||
triggered_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch['trigger_text'],
|
||||
wordlist=watch.get('trigger_text'),
|
||||
mode='line numbers'
|
||||
)
|
||||
ignored_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch.get('ignore_text'),
|
||||
mode='line numbers'
|
||||
)
|
||||
blocked_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||
wordlist=watch.get("text_should_not_be_present"),
|
||||
mode='line numbers'
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
|
||||
|
||||
output = render_template("preview.html",
|
||||
content=content,
|
||||
current_diff_url=watch['url'],
|
||||
current_version=timestamp,
|
||||
history_n=watch.history_n,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - Diff - {watch.label} @ {timestamp}",
|
||||
triggered_line_numbers=triggered_line_numbers,
|
||||
current_diff_url=watch['url'],
|
||||
screenshot=watch.get_screenshot(),
|
||||
watch=watch,
|
||||
uuid=uuid,
|
||||
highlight_ignored_line_numbers=ignored_line_numbers,
|
||||
highlight_triggered_line_numbers=triggered_line_numbers,
|
||||
highlight_blocked_line_numbers=blocked_line_numbers,
|
||||
history_n=watch.history_n,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_text=watch.get_error_text(),
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
versions=versions
|
||||
)
|
||||
last_error_text=watch.get_error_text(),
|
||||
screenshot=watch.get_screenshot(),
|
||||
uuid=uuid,
|
||||
versions=versions,
|
||||
watch=watch,
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['GET', 'POST'])
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
def diff_history_page_build_report(uuid):
|
||||
from changedetectionio import forms
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# For submission of requesting an extract
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
if not extract_form.validate():
|
||||
flash("An error occurred, please see below.", "error")
|
||||
return _render_diff_template(uuid, extract_form)
|
||||
|
||||
else:
|
||||
extract_regex = request.form.get('extract_regex', '').strip()
|
||||
output = watch.extract_regex_from_all_history(extract_regex)
|
||||
if output:
|
||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||
response.headers['Content-type'] = 'text/csv'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
response.headers['Expires'] = "0"
|
||||
return response
|
||||
|
||||
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
|
||||
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract')
|
||||
|
||||
def _render_diff_template(uuid, extract_form=None):
|
||||
"""Helper function to render the diff template with all required data"""
|
||||
from changedetectionio import forms
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
@@ -93,62 +207,41 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# For submission of requesting an extract
|
||||
extract_form = forms.extractDataForm(request.form)
|
||||
if request.method == 'POST':
|
||||
if not extract_form.validate():
|
||||
flash("An error occurred, please see below.", "error")
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
|
||||
else:
|
||||
extract_regex = request.form.get('extract_regex').strip()
|
||||
output = watch.extract_regex_from_all_history(extract_regex)
|
||||
if output:
|
||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||
response.headers['Content-type'] = 'text/csv'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
response.headers['Expires'] = 0
|
||||
return response
|
||||
|
||||
flash('Nothing matches that RegEx', 'error')
|
||||
redirect(url_for('ui_views.diff_history_page', uuid=uuid)+'#extract')
|
||||
# Handle diff display preferences using PreferenceManager
|
||||
# Load preferences from cookies, with URL query params as temporary overrides
|
||||
pref_manager = PreferenceManager(DIFF_PREFERENCES_CONFIG, cookie_scope='global')
|
||||
diff_prefs = pref_manager.load_preferences()
|
||||
|
||||
history = watch.history
|
||||
dates = list(history.keys())
|
||||
|
||||
if len(dates) < 2:
|
||||
flash("Not enough saved change detection snapshots to produce a report.", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
# If a "from_version" was requested, then find it (or the closest one)
|
||||
# Also set "from version" to be the closest version to the one that was last viewed.
|
||||
|
||||
# Save the current newest history as the most recently viewed
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
|
||||
from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
|
||||
from_version = request.args.get('from_version', from_version_timestamp )
|
||||
|
||||
# Read as binary and force decode as UTF-8
|
||||
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
||||
from_version = request.args.get('from_version')
|
||||
from_version_index = -2 # second newest
|
||||
if from_version and from_version in dates:
|
||||
from_version_index = dates.index(from_version)
|
||||
else:
|
||||
from_version = dates[from_version_index]
|
||||
# Use the current one if nothing was specified
|
||||
to_version = request.args.get('to_version', str(dates[-1]))
|
||||
|
||||
try:
|
||||
from_version_file_contents = watch.get_history_snapshot(dates[from_version_index])
|
||||
to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
|
||||
except Exception as e:
|
||||
from_version_file_contents = f"Unable to read to-version at index {dates[from_version_index]}.\n"
|
||||
|
||||
to_version = request.args.get('to_version')
|
||||
to_version_index = -1
|
||||
if to_version and to_version in dates:
|
||||
to_version_index = dates.index(to_version)
|
||||
else:
|
||||
to_version = dates[to_version_index]
|
||||
logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
|
||||
to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
|
||||
|
||||
try:
|
||||
to_version_file_contents = watch.get_history_snapshot(dates[to_version_index])
|
||||
from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
|
||||
except Exception as e:
|
||||
to_version_file_contents = "Unable to read to-version at index{}.\n".format(dates[to_version_index])
|
||||
logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
|
||||
from_version_file_contents = f"Unable to read to-version {from_version}.\n"
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
@@ -162,13 +255,28 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
||||
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
|
||||
content = diff.render_diff(previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
# include_removed=diff_prefs.get('removed'),
|
||||
# include_added=diff_prefs.get('added'),
|
||||
# include_replaced=diff_prefs.get('replaced'),
|
||||
ignore_junk=diff_prefs.get('diff_ignoreWhitespace'),
|
||||
include_equal=not diff_prefs.get('diff_changesOnly'),
|
||||
word_diff=diff_prefs.get('diff_type') == 'diffWords',
|
||||
)
|
||||
content = apply_html_color_to_body(n_body=content)
|
||||
content = content.replace(CUSTOM_LINEBREAK_PLACEHOLDER, "\n")
|
||||
|
||||
output = render_template("diff.html",
|
||||
content=content,
|
||||
current_diff_url=watch['url'],
|
||||
from_version=str(from_version),
|
||||
to_version=str(to_version),
|
||||
diff_prefs=diff_prefs,
|
||||
extra_stylesheets=extra_stylesheets,
|
||||
extra_title=f" - Diff - {watch.label}",
|
||||
extra_title=f" - {watch.label} - History",
|
||||
extract_form=extract_form,
|
||||
from_version=str(from_version),
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
@@ -177,16 +285,50 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
newest=to_version_file_contents,
|
||||
newest_version_timestamp=dates[-1],
|
||||
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||
from_version_file_contents=from_version_file_contents,
|
||||
to_version_file_contents=to_version_file_contents,
|
||||
screenshot=screenshot_url,
|
||||
to_version=str(to_version),
|
||||
uuid=uuid,
|
||||
versions=dates, # All except current/last
|
||||
watch_a=watch
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
return _render_diff_template(uuid)
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>/style", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_set_preferences(uuid):
|
||||
"""Handle POST request to set diff display preferences via cookies"""
|
||||
# Load preferences from POST form and set cookies
|
||||
pref_manager = PreferenceManager(DIFF_PREFERENCES_CONFIG, cookie_scope='global')
|
||||
diff_prefs = pref_manager.load_from_form()
|
||||
|
||||
# Build redirect params including preferences (for shareable URLs) and preserved params
|
||||
redirect_params = {}
|
||||
|
||||
# Add diff preferences to URL so it's shareable
|
||||
for key, value in diff_prefs.items():
|
||||
if isinstance(value, bool):
|
||||
redirect_params[key] = 'on' if value else 'off'
|
||||
else:
|
||||
redirect_params[key] = value
|
||||
|
||||
# Preserve query parameters (from_version, to_version) but exclude csrf_token
|
||||
for param in ['from_version', 'to_version']:
|
||||
if param in request.args:
|
||||
redirect_params[param] = request.args.get(param)
|
||||
|
||||
# Ensure csrf_token is never included in redirect URL
|
||||
redirect_params.pop('csrf_token', None)
|
||||
|
||||
# Redirect back to GET with all params and apply cookies
|
||||
redirect_url = url_for('ui.ui_views.diff_history_page', uuid=uuid, **redirect_params) + '#text'
|
||||
response = make_response(redirect(redirect_url))
|
||||
return pref_manager.apply_cookies_to_response(response)
|
||||
|
||||
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def form_quick_watch_add():
|
||||
@@ -212,7 +354,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||
else:
|
||||
# Straight into the queue.
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
flash("Watch added.")
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
||||
|
||||
@@ -44,12 +44,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
unread_only = request.args.get('unread') == "1"
|
||||
errored_count = 0
|
||||
search_q = request.args.get('q').strip().lower() if request.args.get('q') else False
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
|
||||
if unread_only and (watch.viewed or watch.last_changed == 0) :
|
||||
continue
|
||||
|
||||
if active_tag_uuid and not active_tag_uuid in watch['tags']:
|
||||
continue
|
||||
if watch.get('last_error'):
|
||||
@@ -72,31 +76,32 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
output = render_template(
|
||||
"watch-overview.html",
|
||||
active_tag=active_tag,
|
||||
active_tag_uuid=active_tag_uuid,
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
datastore=datastore,
|
||||
errored_count=errored_count,
|
||||
form=form,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=datastore.proxy_list,
|
||||
has_unviewed=datastore.has_unviewed,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=time.time(),
|
||||
pagination=pagination,
|
||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||
search_q=request.args.get('q', '').strip(),
|
||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||
tags=sorted_tags,
|
||||
watches=sorted_watches
|
||||
)
|
||||
active_tag=active_tag,
|
||||
active_tag_uuid=active_tag_uuid,
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
datastore=datastore,
|
||||
errored_count=errored_count,
|
||||
form=form,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=datastore.proxy_list,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||
search_q=request.args.get('q', '').strip(),
|
||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||
tags=sorted_tags,
|
||||
unread_changes_count=datastore.unread_changes_count,
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
if session.get('share-link'):
|
||||
del(session['share-link'])
|
||||
del (session['share-link'])
|
||||
|
||||
resp = make_response(output)
|
||||
|
||||
|
||||
@@ -1,10 +1,16 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title %}
|
||||
{%- extends 'base.html' -%}
|
||||
{%- block content -%}
|
||||
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
<script>let nowtimeserver={{ now_time_server }};</script>
|
||||
|
||||
<script>let favicon_baseURL="{{ url_for('static_content', group='favicon', filename="PLACEHOLDER")}}";</script>
|
||||
<script>
|
||||
// Initialize Feather icons after the page loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
feather.replace();
|
||||
});
|
||||
</script>
|
||||
<style>
|
||||
.checking-now .last-checked {
|
||||
background-image: linear-gradient(to bottom, transparent 0%, rgba(0,0,0,0.05) 40%, rgba(0,0,0,0.1) 100%);
|
||||
@@ -13,19 +19,20 @@
|
||||
transition: background-size 0.9s ease
|
||||
}
|
||||
</style>
|
||||
<div class="box">
|
||||
<div class="box" id="form-quick-watch-add">
|
||||
|
||||
<form class="pure-form" action="{{ url_for('ui.ui_views.form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<fieldset>
|
||||
<legend>Add a new change detection watch</legend>
|
||||
<legend>Add a new web page change detection watch</legend>
|
||||
<div id="watch-add-wrapper-zone">
|
||||
|
||||
{{ render_nolabel_field(form.url, placeholder="https://...", required=true) }}
|
||||
{{ render_nolabel_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="watch label / tag") }}
|
||||
{{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }}
|
||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
||||
</div>
|
||||
<div id="watch-group-tag">
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="Watch group / tag", class="transparent-field") }}
|
||||
</div>
|
||||
<div id="quick-watch-processor-type">
|
||||
{{ render_simple_field(form.processor) }}
|
||||
</div>
|
||||
@@ -33,215 +40,227 @@
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span>
|
||||
</form>
|
||||
|
||||
</div>
|
||||
<div class="box">
|
||||
<form class="pure-form" action="{{ url_for('ui.form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||
<input type="hidden" id="op_extradata" name="op_extradata" value="" >
|
||||
<div id="checkbox-operations">
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="pause">Pause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause">UnPause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mute">Mute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute">UnMute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag">Tag</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed">Mark viewed</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors">Clear errors</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history">Clear/reset history</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete">Delete</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Pause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnPause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnMute</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Recheck</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Tag</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mark viewed</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Use default notification</button>
|
||||
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear errors</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear/reset history</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Delete</button>
|
||||
</div>
|
||||
{% if watches|length >= pagination.per_page %}
|
||||
{%- if watches|length >= pagination.per_page -%}
|
||||
{{ pagination.info }}
|
||||
{% endif %}
|
||||
{% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %}
|
||||
{%- endif -%}
|
||||
{%- if search_q -%}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
|
||||
<div>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
|
||||
|
||||
<!-- tag list -->
|
||||
{% for uuid, tag in tags %}
|
||||
{% if tag != "" %}
|
||||
{%- for uuid, tag in tags -%}
|
||||
{%- if tag != "" -%}
|
||||
<a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
|
||||
{% set sort_order = sort_order or 'asc' %}
|
||||
{% set sort_attribute = sort_attribute or 'last_changed' %}
|
||||
{% set pagination_page = request.args.get('page', 0) %}
|
||||
{% set cols_required = 6 %}
|
||||
{% set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") %}
|
||||
{% if any_has_restock_price_processor %}
|
||||
{% set cols_required = cols_required + 1 %}
|
||||
{% endif %}
|
||||
|
||||
<div id="watch-table-wrapper">
|
||||
|
||||
<table class="pure-table pure-table-striped watch-table">
|
||||
{%- set sort_order = sort_order or 'asc' -%}
|
||||
{%- set sort_attribute = sort_attribute or 'last_changed' -%}
|
||||
{%- set pagination_page = request.args.get('page', 0) -%}
|
||||
{%- set cols_required = 6 -%}
|
||||
{%- set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") -%}
|
||||
{%- if any_has_restock_price_processor -%}
|
||||
{%- set cols_required = cols_required + 1 -%}
|
||||
{%- endif -%}
|
||||
{%- set ui_settings = datastore.data['settings']['application']['ui'] -%}
|
||||
{%- set wrapper_classes = [
|
||||
'has-unread-changes' if unread_changes_count else '',
|
||||
'has-error' if errored_count else '',
|
||||
] -%}
|
||||
<div id="watch-table-wrapper" class="{{ wrapper_classes | reject('equalto', '') | join(' ') }}">
|
||||
{%- set table_classes = [
|
||||
'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled',
|
||||
] -%}
|
||||
<table class="pure-table pure-table-striped watch-table {{ table_classes | reject('equalto', '') | join(' ') }}">
|
||||
<thead>
|
||||
<tr>
|
||||
{% set link_order = "desc" if sort_order == 'asc' else "asc" %}
|
||||
{% set arrow_span = "" %}
|
||||
{%- set link_order = "desc" if sort_order == 'asc' else "asc" -%}
|
||||
{%- set arrow_span = "" -%}
|
||||
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th class="empty-cell"></th>
|
||||
<th>
|
||||
<a class="{{ 'active '+link_order if sort_attribute == 'paused' else 'inactive' }}" href="{{url_for('watchlist.index', sort='paused', order=link_order, tag=active_tag_uuid)}}"><i data-feather="pause" style="vertical-align: bottom; width: 14px; height: 14px; margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
|
||||
|
||||
<a class="{{ 'active '+link_order if sort_attribute == 'notification_muted' else 'inactive' }}" href="{{url_for('watchlist.index', sort='notification_muted', order=link_order, tag=active_tag_uuid)}}"><i data-feather="volume-2" style="vertical-align: bottom; width: 14px; height: 14px; margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
|
||||
</th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
||||
{% if any_has_restock_price_processor %}
|
||||
{%- if any_has_restock_price_processor -%}
|
||||
<th>Restock & Price</th>
|
||||
{% endif %}
|
||||
{%- endif -%}
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th class="empty-cell"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% if not watches|length %}
|
||||
{%- if not watches|length -%}
|
||||
<tr>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
|
||||
{%- endif -%}
|
||||
|
||||
{% set is_unviewed = watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}
|
||||
{% set checking_now = is_checking_now(watch) %}
|
||||
<tr id="{{ watch.uuid }}"
|
||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
|
||||
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
||||
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
|
||||
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
|
||||
{% if is_unviewed %}unviewed{% endif %}
|
||||
{% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %}
|
||||
{% if watch.uuid in queued_uuids %}queued{% endif %}
|
||||
{% if checking_now %}checking-now{% endif %}
|
||||
">
|
||||
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
|
||||
{%- for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) -%}
|
||||
{%- set checking_now = is_checking_now(watch) -%}
|
||||
{%- set history_n = watch.history_n -%}
|
||||
{%- set favicon = watch.get_favicon_filename() -%}
|
||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{%- set row_classes = [
|
||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||
'processor-' ~ watch['processor'],
|
||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||
'unviewed' if watch.has_unviewed else '',
|
||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||
'has-favicon' if favicon else '',
|
||||
'in-stock' if watch.has_restock_info and watch['restock']['in_stock'] else '',
|
||||
'not-in-stock' if watch.has_restock_info and not watch['restock']['in_stock'] else '',
|
||||
'queued' if watch.uuid in queued_uuids else '',
|
||||
'checking-now' if checking_now else '',
|
||||
'notification_muted' if watch.notification_muted else '',
|
||||
'single-history' if history_n == 1 else '',
|
||||
'multiple-history' if history_n >= 2 else '',
|
||||
'use-html-title' if system_use_url_watchlist else 'no-html-title',
|
||||
] -%}
|
||||
<tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}">
|
||||
<td class="inline checkbox-uuid" ><div><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
|
||||
<td class="inline watch-controls">
|
||||
{% if not watch.paused %}
|
||||
<a class="state-off" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
|
||||
{% else %}
|
||||
<a class="state-on" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
||||
{% endif %}
|
||||
{% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
|
||||
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
|
||||
</td>
|
||||
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
|
||||
{% if watch.get_fetch_backend == "html_webdriver"
|
||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||
or "extra_browser_" in watch.get_fetch_backend
|
||||
%}
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||
{% endif %}
|
||||
|
||||
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
|
||||
{% if watch.has_browser_steps %}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" title="Browser Steps is enabled" >{% endif %}
|
||||
{% if watch.last_error is defined and watch.last_error != False %}
|
||||
<div class="fetch-error">{{ watch.last_error }}
|
||||
|
||||
{% if '403' in watch.last_error %}
|
||||
{% if has_proxies %}
|
||||
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>
|
||||
{% endif %}
|
||||
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
|
||||
|
||||
{% endif %}
|
||||
{% if 'empty result or contain only an image' in watch.last_error %}
|
||||
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>.
|
||||
{% endif %}
|
||||
<div>
|
||||
<a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
|
||||
<a class="ajax-op state-on pause-toggle" data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
||||
<a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a>
|
||||
<a class="ajax-op state-on mute-toggle" data-op="mute" style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
|
||||
<div class="fetch-error notification-error"><a href="{{url_for('settings.notification_logs')}}">{{ watch.last_notification_error }}</a></div>
|
||||
{% endif %}
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' %}
|
||||
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
|
||||
{% endif %}
|
||||
{% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %}
|
||||
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
||||
{% endfor %}
|
||||
</td>
|
||||
<!-- @todo make it so any watch handler obj can expose this --->
|
||||
{% if any_has_restock_price_processor %}
|
||||
|
||||
<td class="title-col inline">
|
||||
<div class="flex-wrapper">
|
||||
{% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
|
||||
<div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder' #}
|
||||
<img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {% endif %} />
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if watch['processor'] == 'restock_diff' -%}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
|
||||
{%- endif -%}
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
{%- if watch.get_fetch_backend == "html_webdriver"
|
||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||
or "extra_browser_" in watch.get_fetch_backend
|
||||
-%}
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||
{%- endif -%}
|
||||
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
|
||||
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
{%- if any_has_restock_price_processor -%}
|
||||
<td class="restock-and-price">
|
||||
{% if watch['processor'] == 'restock_diff' %}
|
||||
{% if watch.has_restock_info %}
|
||||
{%- if watch['processor'] == 'restock_diff' -%}
|
||||
{%- if watch.has_restock_info -%}
|
||||
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
|
||||
<!-- maybe some object watch['processor'][restock_diff] or.. -->
|
||||
{% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %}
|
||||
{%- if watch['restock']['in_stock']-%} In stock {%- else-%} Not in stock {%- endif -%}
|
||||
</span>
|
||||
{% endif %}
|
||||
{%- endif -%}
|
||||
|
||||
{% if watch.get('restock') and watch['restock']['price'] != None %}
|
||||
{% if watch['restock']['price'] != None %}
|
||||
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
|
||||
{%- if watch['restock']['price'] != None -%}
|
||||
<span class="restock-label price" title="Price">
|
||||
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
|
||||
</span>
|
||||
{% endif %}
|
||||
{% elif not watch.has_restock_info %}
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">No information</span>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
</td>
|
||||
{% endif %}
|
||||
{%- endif -%}
|
||||
{#last_checked becomes fetch-start-time#}
|
||||
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" {% if checking_now %} data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" {% endif %} >
|
||||
{% if checking_now %}
|
||||
<span class="spinner"></span><span> Checking now</span>
|
||||
{% else %}
|
||||
{{watch|format_last_checked_time|safe}}</td>
|
||||
{% endif %}
|
||||
|
||||
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
|
||||
{{watch.last_changed|format_timestamp_timeago}}
|
||||
{% else %}
|
||||
Not yet
|
||||
{% endif %}
|
||||
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
|
||||
<div class="spinner-wrapper" style="display:none;" >
|
||||
<span class="spinner"></span><span> Checking now</span>
|
||||
</div>
|
||||
<span class="innertext">{{watch|format_last_checked_time|safe}}</span>
|
||||
</td>
|
||||
<td>
|
||||
<a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
|
||||
class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
|
||||
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
|
||||
{{watch.last_changed|format_timestamp_timeago}}
|
||||
{%- else -%}
|
||||
Not yet
|
||||
{%- endif -%}
|
||||
</td>
|
||||
<td class="buttons">
|
||||
<div>
|
||||
{%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%}
|
||||
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a>
|
||||
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
||||
{% if watch.history_n >= 2 %}
|
||||
|
||||
{% set open_diff_in_new_tab = datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') %}
|
||||
{% set target_attr = ' target="' ~ watch.uuid ~ '"' if open_diff_in_new_tab else '' %}
|
||||
|
||||
{% if is_unviewed %}
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
|
||||
{% else %}
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
|
||||
{% endif %}
|
||||
|
||||
{% else %}
|
||||
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
|
||||
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary">Preview</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a>
|
||||
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{%- endfor -%}
|
||||
</tbody>
|
||||
</table>
|
||||
<ul id="post-list-buttons">
|
||||
{% if errored_count %}
|
||||
<li>
|
||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error ">With errors ({{ errored_count }})</a>
|
||||
<li id="post-list-with-errors" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
{% if has_unviewed %}
|
||||
<li>
|
||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Mark all viewed</a>
|
||||
<li id="post-list-mark-views" style="display: none;" >
|
||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
|
||||
</li>
|
||||
{%- if active_tag_uuid -%}
|
||||
<li id="post-list-mark-views-tag">
|
||||
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
|
||||
</li>
|
||||
{%- endif -%}
|
||||
<li id="post-list-unread" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
<li>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck
|
||||
all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
|
||||
all {% if active_tag_uuid %} in '{{active_tag.title}}'{%endif%}</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
||||
@@ -251,4 +270,4 @@
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
{%- endblock -%}
|
||||
@@ -1,11 +1,9 @@
|
||||
from flask import Blueprint
|
||||
|
||||
from json_logic.builtins import BUILTINS
|
||||
|
||||
from .exceptions import EmptyConditionRuleRowNotUsable
|
||||
from .pluggy_interface import plugin_manager # Import the pluggy plugin manager
|
||||
from . import default_plugin
|
||||
|
||||
from loguru import logger
|
||||
# List of all supported JSON Logic operators
|
||||
operator_choices = [
|
||||
(None, "Choose one - Operator"),
|
||||
@@ -16,7 +14,6 @@ operator_choices = [
|
||||
("==", "Equals"),
|
||||
("!=", "Not Equals"),
|
||||
("in", "Contains"),
|
||||
("!in", "Does Not Contain"),
|
||||
]
|
||||
|
||||
# Fields available in the rules
|
||||
@@ -113,12 +110,14 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
|
||||
application_datastruct=application_datastruct,
|
||||
ephemeral_data=ephemeral_data
|
||||
)
|
||||
|
||||
logger.debug(f"Trying plugin {plugin}....")
|
||||
|
||||
# Set a timeout of 10 seconds
|
||||
try:
|
||||
new_execute_data = future.result(timeout=10)
|
||||
if new_execute_data and isinstance(new_execute_data, dict):
|
||||
EXECUTE_DATA.update(new_execute_data)
|
||||
|
||||
except concurrent.futures.TimeoutError:
|
||||
# The plugin took too long, abort processing for this watch
|
||||
raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.")
|
||||
|
||||
@@ -21,17 +21,21 @@ def register_operators():
|
||||
def length_max(_, text, strlen):
|
||||
return len(text) <= int(strlen)
|
||||
|
||||
# ✅ Custom function for case-insensitive regex matching
|
||||
# Custom function for case-insensitive regex matching
|
||||
def contains_regex(_, text, pattern):
|
||||
"""Returns True if `text` contains `pattern` (case-insensitive regex match)."""
|
||||
return bool(re.search(pattern, str(text), re.IGNORECASE))
|
||||
|
||||
# ✅ Custom function for NOT matching case-insensitive regex
|
||||
# Custom function for NOT matching case-insensitive regex
|
||||
def not_contains_regex(_, text, pattern):
|
||||
"""Returns True if `text` does NOT contain `pattern` (case-insensitive regex match)."""
|
||||
return not bool(re.search(pattern, str(text), re.IGNORECASE))
|
||||
|
||||
def not_contains(_, text, pattern):
|
||||
return not pattern in text
|
||||
|
||||
return {
|
||||
"!in": not_contains,
|
||||
"!contains_regex": not_contains_regex,
|
||||
"contains_regex": contains_regex,
|
||||
"ends_with": ends_with,
|
||||
@@ -43,6 +47,7 @@ def register_operators():
|
||||
@hookimpl
|
||||
def register_operator_choices():
|
||||
return [
|
||||
("!in", "Does NOT Contain"),
|
||||
("starts_with", "Text Starts With"),
|
||||
("ends_with", "Text Ends With"),
|
||||
("length_min", "Length minimum"),
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import pluggy
|
||||
from loguru import logger
|
||||
|
||||
LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000
|
||||
|
||||
# Support both plugin systems
|
||||
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
||||
@@ -9,15 +11,20 @@ def levenshtein_ratio_recent_history(watch, incoming_text=None):
|
||||
try:
|
||||
from Levenshtein import ratio, distance
|
||||
k = list(watch.history.keys())
|
||||
if len(k) >= 2:
|
||||
# When called from ui_edit_stats_extras, we don't have incoming_text
|
||||
if incoming_text is None:
|
||||
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
|
||||
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot
|
||||
else:
|
||||
a = watch.get_history_snapshot(timestamp=k[-2]) # Second newest, incoming_text will be "newest"
|
||||
b = incoming_text
|
||||
|
||||
a = None
|
||||
b = None
|
||||
|
||||
# When called from ui_edit_stats_extras, we don't have incoming_text
|
||||
if incoming_text is None:
|
||||
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
|
||||
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot
|
||||
|
||||
# Needs atleast one snapshot
|
||||
elif len(k) >= 1: # Should be atleast one snapshot to compare against
|
||||
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
|
||||
b = incoming_text if incoming_text else k[-2]
|
||||
|
||||
if a and b:
|
||||
distance_value = distance(a, b)
|
||||
ratio_value = ratio(a, b)
|
||||
return {
|
||||
@@ -53,7 +60,7 @@ def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
||||
# ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc
|
||||
|
||||
if watch and 'text' in ephemeral_data:
|
||||
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data['text'])
|
||||
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
|
||||
if isinstance(lev_data, dict):
|
||||
res['levenshtein_ratio'] = lev_data.get('ratio', 0)
|
||||
res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
|
||||
@@ -67,7 +74,17 @@ def ui_edit_stats_extras(watch):
|
||||
"""Generate the HTML for Levenshtein stats - shared by both plugin systems"""
|
||||
if len(watch.history.keys()) < 2:
|
||||
return "<p>Not enough history to calculate Levenshtein metrics</p>"
|
||||
|
||||
|
||||
|
||||
# Protection against the algorithm getting stuck on huge documents
|
||||
k = list(watch.history.keys())
|
||||
if any(
|
||||
len(watch.get_history_snapshot(timestamp=k[idx])) > LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS
|
||||
for idx in (-1, -2)
|
||||
if len(k) >= abs(idx)
|
||||
):
|
||||
return "<p>Snapshot too large for edit statistics, skipping.</p>"
|
||||
|
||||
try:
|
||||
lev_data = levenshtein_ratio_recent_history(watch)
|
||||
if not lev_data or not isinstance(lev_data, dict):
|
||||
|
||||
@@ -28,6 +28,7 @@ from changedetectionio.content_fetchers.requests import fetcher as html_requests
|
||||
import importlib.resources
|
||||
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||
FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8')
|
||||
|
||||
|
||||
def available_fetchers():
|
||||
|
||||
@@ -48,6 +48,7 @@ class Fetcher():
|
||||
error = None
|
||||
fetcher_description = "No description"
|
||||
headers = {}
|
||||
favicon_blob = None
|
||||
instock_data = None
|
||||
instock_data_js = ""
|
||||
status_code = None
|
||||
@@ -63,21 +64,35 @@ class Fetcher():
|
||||
# Time ONTOP of the system defined env minimum time
|
||||
render_extract_delay = 0
|
||||
|
||||
def clear_content(self):
|
||||
"""
|
||||
Explicitly clear all content from memory to free up heap space.
|
||||
Call this after content has been saved to disk.
|
||||
"""
|
||||
self.content = None
|
||||
if hasattr(self, 'raw_content'):
|
||||
self.raw_content = None
|
||||
self.screenshot = None
|
||||
self.xpath_data = None
|
||||
# Keep headers and status_code as they're small
|
||||
|
||||
@abstractmethod
|
||||
def get_error(self):
|
||||
return self.error
|
||||
|
||||
@abstractmethod
|
||||
def run(self,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
# Should set self.error, self.status_code and self.content
|
||||
pass
|
||||
|
||||
@@ -122,10 +137,10 @@ class Fetcher():
|
||||
|
||||
return None
|
||||
|
||||
def iterate_browser_steps(self, start_url=None):
|
||||
async def iterate_browser_steps(self, start_url=None):
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||
from playwright._impl._errors import TimeoutError, Error
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
step_n = 0
|
||||
|
||||
if self.browser_steps is not None and len(self.browser_steps):
|
||||
@@ -136,8 +151,8 @@ class Fetcher():
|
||||
for step in valid_steps:
|
||||
step_n += 1
|
||||
logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
|
||||
self.screenshot_step("before-" + str(step_n))
|
||||
self.save_step_html("before-" + str(step_n))
|
||||
await self.screenshot_step("before-" + str(step_n))
|
||||
await self.save_step_html("before-" + str(step_n))
|
||||
|
||||
try:
|
||||
optional_value = step['optional_value']
|
||||
@@ -148,11 +163,11 @@ class Fetcher():
|
||||
if '{%' in step['selector'] or '{{' in step['selector']:
|
||||
selector = jinja_render(template_str=step['selector'])
|
||||
|
||||
getattr(interface, "call_action")(action_name=step['operation'],
|
||||
await getattr(interface, "call_action")(action_name=step['operation'],
|
||||
selector=selector,
|
||||
optional_value=optional_value)
|
||||
self.screenshot_step(step_n)
|
||||
self.save_step_html(step_n)
|
||||
await self.screenshot_step(step_n)
|
||||
await self.save_step_html(step_n)
|
||||
except (Error, TimeoutError) as e:
|
||||
logger.debug(str(e))
|
||||
# Stop processing here
|
||||
|
||||
@@ -5,19 +5,19 @@ from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
def capture_full_page(page):
|
||||
async def capture_full_page_async(page):
|
||||
import os
|
||||
import time
|
||||
from multiprocessing import Process, Pipe
|
||||
|
||||
start = time.time()
|
||||
|
||||
page_height = page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = page.evaluate("document.documentElement.scrollWidth")
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport_size
|
||||
|
||||
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
|
||||
@@ -32,23 +32,23 @@ def capture_full_page(page):
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
page.request_gc()
|
||||
page.evaluate(f"window.scrollTo(0, {y})")
|
||||
page.request_gc()
|
||||
screenshot_chunks.append(page.screenshot(
|
||||
await page.request_gc()
|
||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||
await page.request_gc()
|
||||
screenshot_chunks.append(await page.screenshot(
|
||||
type="jpeg",
|
||||
full_page=False,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
))
|
||||
y += step_size
|
||||
page.request_gc()
|
||||
await page.request_gc()
|
||||
|
||||
# Restore original viewport size
|
||||
page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
|
||||
# If we have multiple chunks, stitch them together
|
||||
if len(screenshot_chunks) > 1:
|
||||
@@ -73,7 +73,6 @@ def capture_full_page(page):
|
||||
|
||||
return screenshot_chunks[0]
|
||||
|
||||
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Playwright {}/Javascript".format(
|
||||
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
||||
@@ -124,9 +123,9 @@ class fetcher(Fetcher):
|
||||
self.proxy['username'] = parsed.username
|
||||
self.proxy['password'] = parsed.password
|
||||
|
||||
def screenshot_step(self, step_n=''):
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = capture_full_page(page=self.page)
|
||||
screenshot = await capture_full_page_async(page=self.page)
|
||||
|
||||
|
||||
if self.browser_steps_screenshot_path is not None:
|
||||
@@ -135,45 +134,47 @@ class fetcher(Fetcher):
|
||||
with open(destination, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
|
||||
def save_step_html(self, step_n):
|
||||
async def save_step_html(self, step_n):
|
||||
super().save_step_html(step_n=step_n)
|
||||
content = self.page.content()
|
||||
content = await self.page.content()
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
||||
logger.debug(f"Saving step HTML to {destination}")
|
||||
with open(destination, 'w') as f:
|
||||
f.write(content)
|
||||
|
||||
def run(self,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
|
||||
from playwright.sync_api import sync_playwright
|
||||
from playwright.async_api import async_playwright
|
||||
import playwright._impl._errors
|
||||
import time
|
||||
self.delete_browser_steps_screenshots()
|
||||
response = None
|
||||
|
||||
with sync_playwright() as p:
|
||||
async with async_playwright() as p:
|
||||
browser_type = getattr(p, self.browser_type)
|
||||
|
||||
# Seemed to cause a connection Exception even tho I can see it connect
|
||||
# self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
|
||||
# 60,000 connection timeout only
|
||||
browser = browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
|
||||
browser = await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
|
||||
|
||||
# SOCKS5 with authentication is not supported (yet)
|
||||
# https://github.com/microsoft/playwright/issues/10567
|
||||
|
||||
# Set user agent to prevent Cloudflare from blocking the browser
|
||||
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
||||
context = browser.new_context(
|
||||
context = await browser.new_context(
|
||||
accept_downloads=False, # Should never be needed
|
||||
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
|
||||
extra_http_headers=request_headers,
|
||||
@@ -183,41 +184,47 @@ class fetcher(Fetcher):
|
||||
user_agent=manage_user_agent(headers=request_headers),
|
||||
)
|
||||
|
||||
self.page = context.new_page()
|
||||
self.page = await context.new_page()
|
||||
|
||||
# Listen for all console events and handle errors
|
||||
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
||||
self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
||||
|
||||
# Re-use as much code from browser steps as possible so its the same
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||
browsersteps_interface = steppable_browser_interface(start_url=url)
|
||||
browsersteps_interface.page = self.page
|
||||
|
||||
response = browsersteps_interface.action_goto_url(value=url)
|
||||
self.headers = response.all_headers()
|
||||
response = await browsersteps_interface.action_goto_url(value=url)
|
||||
|
||||
if response is None:
|
||||
context.close()
|
||||
browser.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
logger.debug("Content Fetcher > Response object from the browser communication was none")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
# In async_playwright, all_headers() returns a coroutine
|
||||
try:
|
||||
self.headers = await response.all_headers()
|
||||
except TypeError:
|
||||
# Fallback for sync version
|
||||
self.headers = response.all_headers()
|
||||
|
||||
try:
|
||||
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
|
||||
browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
|
||||
await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
|
||||
except playwright._impl._errors.TimeoutError as e:
|
||||
context.close()
|
||||
browser.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
# This can be ok, we will try to grab what we could retrieve
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}")
|
||||
context.close()
|
||||
browser.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||
self.page.wait_for_timeout(extra_wait * 1000)
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
try:
|
||||
self.status_code = response.status
|
||||
@@ -225,50 +232,58 @@ class fetcher(Fetcher):
|
||||
# https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
|
||||
logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
|
||||
logger.critical(response)
|
||||
context.close()
|
||||
browser.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if fetch_favicon:
|
||||
try:
|
||||
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
||||
await self.page.request_gc()
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = capture_full_page(self.page)
|
||||
screenshot = await capture_full_page_async(self.page)
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
|
||||
context.close()
|
||||
browser.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
# Run Browser Steps here
|
||||
if self.browser_steps_get_valid_steps():
|
||||
self.iterate_browser_steps(start_url=url)
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
|
||||
self.page.wait_for_timeout(extra_wait * 1000)
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
now = time.time()
|
||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||
if current_include_filters is not None:
|
||||
self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
else:
|
||||
self.page.evaluate("var include_filters=''")
|
||||
self.page.request_gc()
|
||||
await self.page.evaluate("var include_filters=''")
|
||||
await self.page.request_gc()
|
||||
|
||||
# request_gc before and after evaluate to free up memory
|
||||
# @todo browsersteps etc
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
self.xpath_data = self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
})
|
||||
self.page.request_gc()
|
||||
await self.page.request_gc()
|
||||
|
||||
self.instock_data = self.page.evaluate(INSTOCK_DATA_JS)
|
||||
self.page.request_gc()
|
||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
||||
await self.page.request_gc()
|
||||
|
||||
self.content = self.page.content()
|
||||
self.page.request_gc()
|
||||
self.content = await self.page.content()
|
||||
await self.page.request_gc()
|
||||
logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
|
||||
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
# JPEG is better here because the screenshots can be very very large
|
||||
@@ -278,7 +293,7 @@ class fetcher(Fetcher):
|
||||
# acceptable screenshot quality here
|
||||
try:
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = capture_full_page(page=self.page)
|
||||
self.screenshot = await capture_full_page_async(page=self.page)
|
||||
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
@@ -286,30 +301,30 @@ class fetcher(Fetcher):
|
||||
finally:
|
||||
# Request garbage collection one more time before closing
|
||||
try:
|
||||
self.page.request_gc()
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Clean up resources properly
|
||||
try:
|
||||
self.page.request_gc()
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
self.page.close()
|
||||
await self.page.close()
|
||||
except:
|
||||
pass
|
||||
self.page = None
|
||||
|
||||
try:
|
||||
context.close()
|
||||
await context.close()
|
||||
except:
|
||||
pass
|
||||
context = None
|
||||
|
||||
try:
|
||||
browser.close()
|
||||
await browser.close()
|
||||
except:
|
||||
pass
|
||||
browser = None
|
||||
|
||||
@@ -8,7 +8,7 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
|
||||
SCREENSHOT_MAX_TOTAL_HEIGHT
|
||||
SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, \
|
||||
BrowserConnectError
|
||||
@@ -51,7 +51,15 @@ async def capture_full_page(page):
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
"""(y) => {
|
||||
document.documentElement.scrollTop = y;
|
||||
document.body.scrollTop = y;
|
||||
}""",
|
||||
y
|
||||
)
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(type_='jpeg',
|
||||
fullPage=False,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
|
||||
@@ -137,19 +145,24 @@ class fetcher(Fetcher):
|
||||
# f.write(content)
|
||||
|
||||
async def fetch_page(self,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes,
|
||||
current_include_filters,
|
||||
empty_pages_are_a_change,
|
||||
fetch_favicon,
|
||||
ignore_status_codes,
|
||||
is_binary,
|
||||
empty_pages_are_a_change
|
||||
request_body,
|
||||
request_headers,
|
||||
request_method,
|
||||
timeout,
|
||||
url,
|
||||
):
|
||||
import re
|
||||
self.delete_browser_steps_screenshots()
|
||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||
|
||||
n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||
extra_wait = min(n, 15)
|
||||
|
||||
logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.")
|
||||
|
||||
from pyppeteer import Pyppeteer
|
||||
pyppeteer_instance = Pyppeteer()
|
||||
@@ -165,12 +178,13 @@ class fetcher(Fetcher):
|
||||
except websockets.exceptions.InvalidURI:
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://")
|
||||
except Exception as e:
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}")
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||
|
||||
# Better is to launch chrome with the URL as arg
|
||||
# non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab
|
||||
# headless - ask a new page
|
||||
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
|
||||
# more reliable is to just request a new page
|
||||
self.page = await browser.newPage()
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||
|
||||
if '--window-size' in self.browser_connection_url:
|
||||
# Be sure the viewport is always the window-size, this is often not the same thing
|
||||
@@ -227,13 +241,35 @@ class fetcher(Fetcher):
|
||||
# browsersteps_interface = steppable_browser_interface()
|
||||
# browsersteps_interface.page = self.page
|
||||
|
||||
response = await self.page.goto(url, waitUntil="load")
|
||||
async def handle_frame_navigation(event):
|
||||
logger.debug(f"Frame navigated: {event}")
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
await asyncio.sleep(w)
|
||||
logger.debug("Issuing stopLoading command...")
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
|
||||
if response is None:
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
self.page._client.on('Page.frameStartedNavigating', lambda event: asyncio.create_task(handle_frame_navigation(event)))
|
||||
self.page._client.on('Page.frameStartedLoading', lambda event: asyncio.create_task(handle_frame_navigation(event)))
|
||||
self.page._client.on('Page.frameStoppedLoading', lambda event: logger.debug(f"Frame stopped loading: {event}"))
|
||||
|
||||
response = None
|
||||
attempt=0
|
||||
while not response:
|
||||
logger.debug(f"Attempting page fetch {url} attempt {attempt}")
|
||||
response = await self.page.goto(url)
|
||||
await asyncio.sleep(1 + extra_wait)
|
||||
if response:
|
||||
break
|
||||
if not response:
|
||||
logger.warning("Page did not fetch! trying again!")
|
||||
if response is None and attempt>=2:
|
||||
await self.page.close()
|
||||
await browser.close()
|
||||
logger.warning(f"Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content) exiting attmpt {attempt}")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
attempt+=1
|
||||
|
||||
self.headers = response.headers
|
||||
|
||||
@@ -258,6 +294,12 @@ class fetcher(Fetcher):
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if fetch_favicon:
|
||||
try:
|
||||
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page)
|
||||
|
||||
@@ -276,7 +318,6 @@ class fetcher(Fetcher):
|
||||
# if self.browser_steps_get_valid_steps():
|
||||
# self.iterate_browser_steps()
|
||||
|
||||
await asyncio.sleep(1 + extra_wait)
|
||||
|
||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||
# Setup the xPath/VisualSelector scraper
|
||||
@@ -310,25 +351,36 @@ class fetcher(Fetcher):
|
||||
async def main(self, **kwargs):
|
||||
await self.fetch_page(**kwargs)
|
||||
|
||||
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
||||
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
|
||||
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
||||
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
|
||||
max_time = int(os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180))
|
||||
|
||||
# This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only
|
||||
# Now we run this properly in async context since we're called from async worker
|
||||
try:
|
||||
asyncio.run(asyncio.wait_for(self.main(
|
||||
url=url,
|
||||
timeout=timeout,
|
||||
request_headers=request_headers,
|
||||
request_body=request_body,
|
||||
request_method=request_method,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
await asyncio.wait_for(self.main(
|
||||
current_include_filters=current_include_filters,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
fetch_favicon=fetch_favicon,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
), timeout=max_time))
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
), timeout=max_time
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
|
||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from loguru import logger
|
||||
import hashlib
|
||||
import os
|
||||
import asyncio
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
@@ -15,7 +16,7 @@ class fetcher(Fetcher):
|
||||
self.proxy_override = proxy_override
|
||||
# browser_connection_url is none because its always 'launched locally'
|
||||
|
||||
def run(self,
|
||||
def _run_sync(self,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
@@ -25,9 +26,11 @@ class fetcher(Fetcher):
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
"""Synchronous version of run - the original requests implementation"""
|
||||
|
||||
import chardet
|
||||
import requests
|
||||
from requests.exceptions import ProxyError, ConnectionError, RequestException
|
||||
|
||||
if self.browser_steps_get_valid_steps():
|
||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||
@@ -35,7 +38,6 @@ class fetcher(Fetcher):
|
||||
proxies = {}
|
||||
|
||||
# Allows override the proxy on a per-request basis
|
||||
|
||||
# https://requests.readthedocs.io/en/latest/user/advanced/#socks
|
||||
# Should also work with `socks5://user:pass@host:port` type syntax.
|
||||
|
||||
@@ -49,17 +51,23 @@ class fetcher(Fetcher):
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||
from requests_file import FileAdapter
|
||||
session.mount('file://', FileAdapter())
|
||||
|
||||
r = session.request(method=request_method,
|
||||
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||
url=url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False)
|
||||
try:
|
||||
r = session.request(method=request_method,
|
||||
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||
url=url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False)
|
||||
except Exception as e:
|
||||
msg = str(e)
|
||||
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
|
||||
msg = f"Proxy connection failed? {msg}"
|
||||
raise Exception(msg) from e
|
||||
|
||||
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
|
||||
# For example - some sites don't tell us it's utf-8, but return utf-8 content
|
||||
@@ -94,9 +102,40 @@ class fetcher(Fetcher):
|
||||
else:
|
||||
self.content = r.text
|
||||
|
||||
|
||||
self.raw_content = r.content
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
"""Async wrapper that runs the synchronous requests code in a thread pool"""
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
# Run the synchronous _run_sync in a thread pool to avoid blocking the event loop
|
||||
await loop.run_in_executor(
|
||||
None, # Use default ThreadPoolExecutor
|
||||
lambda: self._run_sync(
|
||||
url=url,
|
||||
timeout=timeout,
|
||||
request_headers=request_headers,
|
||||
request_body=request_body,
|
||||
request_method=request_method,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=current_include_filters,
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
)
|
||||
)
|
||||
|
||||
def quit(self, watch=None):
|
||||
|
||||
# In case they switched to `requests` fetcher from something else
|
||||
|
||||
101
changedetectionio/content_fetchers/res/favicon-fetcher.js
Normal file
101
changedetectionio/content_fetchers/res/favicon-fetcher.js
Normal file
@@ -0,0 +1,101 @@
|
||||
(async () => {
|
||||
// Define the function inside the IIFE for console testing
|
||||
window.getFaviconAsBlob = async function() {
|
||||
const links = Array.from(document.querySelectorAll(
|
||||
'link[rel~="apple-touch-icon"], link[rel~="icon"]'
|
||||
));
|
||||
|
||||
const icons = links.map(link => {
|
||||
const sizesStr = link.getAttribute('sizes');
|
||||
let size = 0;
|
||||
if (sizesStr) {
|
||||
const [w] = sizesStr.split('x').map(Number);
|
||||
if (!isNaN(w)) size = w;
|
||||
} else {
|
||||
size = 16;
|
||||
}
|
||||
return {
|
||||
size,
|
||||
rel: link.getAttribute('rel'),
|
||||
href: link.href,
|
||||
hasSizes: !!sizesStr
|
||||
};
|
||||
});
|
||||
|
||||
// If no icons found, add fallback favicon.ico
|
||||
if (icons.length === 0) {
|
||||
icons.push({
|
||||
size: 16,
|
||||
rel: 'icon',
|
||||
href: '/favicon.ico',
|
||||
hasSizes: false
|
||||
});
|
||||
}
|
||||
|
||||
// sort preference: highest resolution first, then apple-touch-icon, then regular icons
|
||||
icons.sort((a, b) => {
|
||||
// First priority: actual size (highest first)
|
||||
if (a.size !== b.size) {
|
||||
return b.size - a.size;
|
||||
}
|
||||
|
||||
// Second priority: apple-touch-icon over regular icon
|
||||
const isAppleA = /apple-touch-icon/.test(a.rel);
|
||||
const isAppleB = /apple-touch-icon/.test(b.rel);
|
||||
if (isAppleA && !isAppleB) return -1;
|
||||
if (!isAppleA && isAppleB) return 1;
|
||||
|
||||
// Third priority: icons with no size attribute (fallback icons) last
|
||||
const hasNoSizeA = !a.hasSizes;
|
||||
const hasNoSizeB = !b.hasSizes;
|
||||
if (hasNoSizeA && !hasNoSizeB) return 1;
|
||||
if (!hasNoSizeA && hasNoSizeB) return -1;
|
||||
|
||||
return 0;
|
||||
});
|
||||
|
||||
const timeoutMs = 2000;
|
||||
|
||||
for (const icon of icons) {
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
const resp = await fetch(icon.href, {
|
||||
signal: controller.signal,
|
||||
redirect: 'follow'
|
||||
});
|
||||
|
||||
clearTimeout(timeout);
|
||||
|
||||
if (!resp.ok) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const blob = await resp.blob();
|
||||
|
||||
// Convert blob to base64
|
||||
const reader = new FileReader();
|
||||
return await new Promise(resolve => {
|
||||
reader.onloadend = () => {
|
||||
resolve({
|
||||
url: icon.href,
|
||||
base64: reader.result.split(",")[1]
|
||||
});
|
||||
};
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
|
||||
} catch (e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// nothing found
|
||||
return null;
|
||||
};
|
||||
|
||||
// Auto-execute and return result for page.evaluate()
|
||||
return await window.getFaviconAsBlob();
|
||||
})();
|
||||
|
||||
@@ -17,7 +17,9 @@ async () => {
|
||||
'back in stock soon',
|
||||
'back-order or out of stock',
|
||||
'backordered',
|
||||
'backorder',
|
||||
'benachrichtigt mich', // notify me
|
||||
'binnenkort leverbaar', // coming soon
|
||||
'brak na stanie',
|
||||
'brak w magazynie',
|
||||
'coming soon',
|
||||
@@ -38,12 +40,14 @@ async () => {
|
||||
'mail me when available',
|
||||
'message if back in stock',
|
||||
'mevcut değil',
|
||||
'more on order',
|
||||
'nachricht bei',
|
||||
'nicht auf lager',
|
||||
'nicht lagernd',
|
||||
'nicht lieferbar',
|
||||
'nicht verfügbar',
|
||||
'nicht vorrätig',
|
||||
'nicht mehr lieferbar',
|
||||
'nicht zur verfügung',
|
||||
'nie znaleziono produktów',
|
||||
'niet beschikbaar',
|
||||
@@ -51,6 +55,7 @@ async () => {
|
||||
'niet op voorraad',
|
||||
'no disponible',
|
||||
'no featured offers available',
|
||||
'no longer available',
|
||||
'no longer in stock',
|
||||
'no tickets available',
|
||||
'non disponibile',
|
||||
@@ -84,6 +89,7 @@ async () => {
|
||||
'tidak tersedia',
|
||||
'tijdelijk uitverkocht',
|
||||
'tiket tidak tersedia',
|
||||
'to subscribe to back in stock',
|
||||
'tükendi',
|
||||
'unavailable nearby',
|
||||
'unavailable tickets',
|
||||
@@ -118,8 +124,7 @@ async () => {
|
||||
return text.toLowerCase().trim();
|
||||
}
|
||||
|
||||
const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
|
||||
|
||||
const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock|arrives approximately)', 'ig');
|
||||
// The out-of-stock or in-stock-text is generally always above-the-fold
|
||||
// and often below-the-fold is a list of related products that may or may not contain trigger text
|
||||
// so it's good to filter to just the 'above the fold' elements
|
||||
|
||||
@@ -4,22 +4,20 @@ import time
|
||||
from loguru import logger
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
|
||||
|
||||
class fetcher(Fetcher):
|
||||
if os.getenv("WEBDRIVER_URL"):
|
||||
fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
|
||||
fetcher_description = f"WebDriver Chrome/Javascript via \"{os.getenv('WEBDRIVER_URL', '')}\""
|
||||
else:
|
||||
fetcher_description = "WebDriver Chrome/Javascript"
|
||||
|
||||
# Configs for Proxy setup
|
||||
# In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
|
||||
selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
|
||||
'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
|
||||
'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
|
||||
proxy = None
|
||||
proxy_url = None
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
||||
from urllib.parse import urlparse
|
||||
from selenium.webdriver.common.proxy import Proxy
|
||||
|
||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||
if not custom_browser_connection_url:
|
||||
@@ -28,107 +26,118 @@ class fetcher(Fetcher):
|
||||
self.browser_connection_is_custom = True
|
||||
self.browser_connection_url = custom_browser_connection_url
|
||||
|
||||
# If any proxy settings are enabled, then we should setup the proxy object
|
||||
proxy_args = {}
|
||||
for k in self.selenium_proxy_settings_mappings:
|
||||
v = os.getenv('webdriver_' + k, False)
|
||||
if v:
|
||||
proxy_args[k] = v.strip('"')
|
||||
##### PROXY SETUP #####
|
||||
|
||||
# Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
|
||||
if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
|
||||
proxy_args['httpProxy'] = self.system_http_proxy
|
||||
if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
|
||||
proxy_args['httpsProxy'] = self.system_https_proxy
|
||||
|
||||
# Allows override the proxy on a per-request basis
|
||||
if proxy_override is not None:
|
||||
proxy_args['httpProxy'] = proxy_override
|
||||
|
||||
if proxy_args:
|
||||
self.proxy = SeleniumProxy(raw=proxy_args)
|
||||
|
||||
def run(self,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
from selenium.common.exceptions import WebDriverException
|
||||
# request_body, request_method unused for now, until some magic in the future happens.
|
||||
|
||||
options = ChromeOptions()
|
||||
|
||||
# Load Chrome options from env
|
||||
CHROME_OPTIONS = [
|
||||
line.strip()
|
||||
for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
|
||||
if line.strip()
|
||||
proxy_sources = [
|
||||
self.system_http_proxy,
|
||||
self.system_https_proxy,
|
||||
os.getenv('webdriver_proxySocks'),
|
||||
os.getenv('webdriver_socksProxy'),
|
||||
os.getenv('webdriver_proxyHttp'),
|
||||
os.getenv('webdriver_httpProxy'),
|
||||
os.getenv('webdriver_proxyHttps'),
|
||||
os.getenv('webdriver_httpsProxy'),
|
||||
os.getenv('webdriver_sslProxy'),
|
||||
proxy_override, # last one should override
|
||||
]
|
||||
# The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server=
|
||||
for k in filter(None, proxy_sources):
|
||||
if not k:
|
||||
continue
|
||||
self.proxy_url = k.strip()
|
||||
|
||||
for opt in CHROME_OPTIONS:
|
||||
options.add_argument(opt)
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
|
||||
if self.proxy:
|
||||
options.proxy = self.proxy
|
||||
import asyncio
|
||||
|
||||
self.driver = webdriver.Remote(
|
||||
command_executor=self.browser_connection_url,
|
||||
options=options)
|
||||
# Wrap the entire selenium operation in a thread executor
|
||||
def _run_sync():
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
# request_body, request_method unused for now, until some magic in the future happens.
|
||||
|
||||
try:
|
||||
self.driver.get(url)
|
||||
except WebDriverException as e:
|
||||
# Be sure we close the session window
|
||||
self.quit()
|
||||
raise
|
||||
options = ChromeOptions()
|
||||
|
||||
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
|
||||
self.driver.set_window_size(1280, 1024)
|
||||
# Load Chrome options from env
|
||||
CHROME_OPTIONS = [
|
||||
line.strip()
|
||||
for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
|
||||
if line.strip()
|
||||
]
|
||||
|
||||
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||
for opt in CHROME_OPTIONS:
|
||||
options.add_argument(opt)
|
||||
|
||||
if self.webdriver_js_execute_code is not None:
|
||||
self.driver.execute_script(self.webdriver_js_execute_code)
|
||||
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
||||
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||
# 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable
|
||||
# 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng
|
||||
# 3. selenium only allows ONE runner at a time by default!
|
||||
# 4. driver must use quit() or it will continue to block/hold the selenium process!!
|
||||
|
||||
if self.proxy_url:
|
||||
options.add_argument(f'--proxy-server={self.proxy_url}')
|
||||
|
||||
# @todo - how to check this? is it possible?
|
||||
self.status_code = 200
|
||||
# @todo somehow we should try to get this working for WebDriver
|
||||
# raise EmptyReply(url=url, status_code=r.status_code)
|
||||
|
||||
# @todo - dom wait loaded?
|
||||
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
|
||||
self.content = self.driver.page_source
|
||||
self.headers = {}
|
||||
|
||||
self.screenshot = self.driver.get_screenshot_as_png()
|
||||
|
||||
# Does the connection to the webdriver work? run a test connection.
|
||||
def is_ready(self):
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
|
||||
self.driver = webdriver.Remote(
|
||||
command_executor=self.command_executor,
|
||||
options=ChromeOptions())
|
||||
|
||||
# driver.quit() seems to cause better exceptions
|
||||
self.quit()
|
||||
return True
|
||||
|
||||
def quit(self, watch=None):
|
||||
if self.driver:
|
||||
from selenium.webdriver.remote.remote_connection import RemoteConnection
|
||||
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
|
||||
driver = None
|
||||
try:
|
||||
self.driver.quit()
|
||||
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
|
||||
remote_connection = RemoteConnection(
|
||||
self.browser_connection_url,
|
||||
)
|
||||
remote_connection.set_timeout(30) # seconds
|
||||
|
||||
# Now create the driver with the RemoteConnection
|
||||
driver = RemoteWebDriver(
|
||||
command_executor=remote_connection,
|
||||
options=options
|
||||
)
|
||||
|
||||
driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45)))
|
||||
except Exception as e:
|
||||
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
|
||||
if driver:
|
||||
driver.quit()
|
||||
raise e
|
||||
|
||||
try:
|
||||
driver.get(url)
|
||||
|
||||
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
|
||||
driver.set_window_size(1280, 1024)
|
||||
|
||||
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||
|
||||
if self.webdriver_js_execute_code is not None:
|
||||
driver.execute_script(self.webdriver_js_execute_code)
|
||||
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
||||
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||
|
||||
# @todo - how to check this? is it possible?
|
||||
self.status_code = 200
|
||||
# @todo somehow we should try to get this working for WebDriver
|
||||
# raise EmptyReply(url=url, status_code=r.status_code)
|
||||
|
||||
# @todo - dom wait loaded?
|
||||
import time
|
||||
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
|
||||
self.content = driver.page_source
|
||||
self.headers = {}
|
||||
self.screenshot = driver.get_screenshot_as_png()
|
||||
except Exception as e:
|
||||
driver.quit()
|
||||
raise e
|
||||
|
||||
driver.quit()
|
||||
|
||||
# Run the selenium operations in a thread pool to avoid blocking the event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, _run_sync)
|
||||
|
||||
535
changedetectionio/custom_queue.py
Normal file
535
changedetectionio/custom_queue.py
Normal file
@@ -0,0 +1,535 @@
|
||||
import queue
|
||||
import asyncio
|
||||
from blinker import signal
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class NotificationQueue(queue.Queue):
|
||||
"""
|
||||
Extended Queue that sends a 'notification_event' signal when notifications are added.
|
||||
|
||||
This class extends the standard Queue and adds a signal emission after a notification
|
||||
is put into the queue. The signal includes the watch UUID if available.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize=0):
|
||||
super().__init__(maxsize)
|
||||
try:
|
||||
self.notification_event_signal = signal('notification_event')
|
||||
except Exception as e:
|
||||
logger.critical(f"Exception creating notification_event signal: {e}")
|
||||
|
||||
def put(self, item, block=True, timeout=None):
|
||||
# Call the parent's put method first
|
||||
super().put(item, block, timeout)
|
||||
|
||||
# After putting the notification in the queue, emit signal with watch UUID
|
||||
try:
|
||||
if self.notification_event_signal and isinstance(item, dict):
|
||||
watch_uuid = item.get('uuid')
|
||||
if watch_uuid:
|
||||
# Send the notification_event signal with the watch UUID
|
||||
self.notification_event_signal.send(watch_uuid=watch_uuid)
|
||||
logger.trace(f"NotificationQueue: Emitted notification_event signal for watch UUID {watch_uuid}")
|
||||
else:
|
||||
# Send signal without UUID for system notifications
|
||||
self.notification_event_signal.send()
|
||||
logger.trace("NotificationQueue: Emitted notification_event signal for system notification")
|
||||
except Exception as e:
|
||||
logger.error(f"Exception emitting notification_event signal: {e}")
|
||||
|
||||
class SignalPriorityQueue(queue.PriorityQueue):
|
||||
"""
|
||||
Extended PriorityQueue that sends a signal when items with a UUID are added.
|
||||
|
||||
This class extends the standard PriorityQueue and adds a signal emission
|
||||
after an item is put into the queue. If the item contains a UUID, the signal
|
||||
is sent with that UUID as a parameter.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize=0):
|
||||
super().__init__(maxsize)
|
||||
try:
|
||||
self.queue_length_signal = signal('queue_length')
|
||||
except Exception as e:
|
||||
logger.critical(f"Exception: {e}")
|
||||
|
||||
def put(self, item, block=True, timeout=None):
|
||||
# Call the parent's put method first
|
||||
super().put(item, block, timeout)
|
||||
|
||||
# After putting the item in the queue, check if it has a UUID and emit signal
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
||||
uuid = item.item['uuid']
|
||||
# Get the signal and send it if it exists
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
# Send the watch_uuid parameter
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
# Send queue_length signal with current queue size
|
||||
try:
|
||||
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
except Exception as e:
|
||||
logger.critical(f"Exception: {e}")
|
||||
|
||||
def get(self, block=True, timeout=None):
|
||||
# Call the parent's get method first
|
||||
item = super().get(block, timeout)
|
||||
|
||||
# Send queue_length signal with current queue size
|
||||
try:
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
except Exception as e:
|
||||
logger.critical(f"Exception: {e}")
|
||||
return item
|
||||
|
||||
def get_uuid_position(self, target_uuid):
|
||||
"""
|
||||
Find the position of a watch UUID in the priority queue.
|
||||
Optimized for large queues - O(n) complexity instead of O(n log n).
|
||||
|
||||
Args:
|
||||
target_uuid: The UUID to search for
|
||||
|
||||
Returns:
|
||||
dict: Contains position info or None if not found
|
||||
- position: 0-based position in queue (0 = next to be processed)
|
||||
- total_items: total number of items in queue
|
||||
- priority: the priority value of the found item
|
||||
"""
|
||||
with self.mutex:
|
||||
queue_list = list(self.queue)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {
|
||||
'position': None,
|
||||
'total_items': 0,
|
||||
'priority': None,
|
||||
'found': False
|
||||
}
|
||||
|
||||
# Find the target item and its priority first - O(n)
|
||||
target_item = None
|
||||
target_priority = None
|
||||
|
||||
for item in queue_list:
|
||||
if (hasattr(item, 'item') and
|
||||
isinstance(item.item, dict) and
|
||||
item.item.get('uuid') == target_uuid):
|
||||
target_item = item
|
||||
target_priority = item.priority
|
||||
break
|
||||
|
||||
if target_item is None:
|
||||
return {
|
||||
'position': None,
|
||||
'total_items': total_items,
|
||||
'priority': None,
|
||||
'found': False
|
||||
}
|
||||
|
||||
# Count how many items have higher priority (lower numbers) - O(n)
|
||||
position = 0
|
||||
for item in queue_list:
|
||||
# Items with lower priority numbers are processed first
|
||||
if item.priority < target_priority:
|
||||
position += 1
|
||||
elif item.priority == target_priority and item != target_item:
|
||||
# For same priority, count items that come before this one
|
||||
# (Note: this is approximate since heap order isn't guaranteed for equal priorities)
|
||||
position += 1
|
||||
|
||||
return {
|
||||
'position': position,
|
||||
'total_items': total_items,
|
||||
'priority': target_priority,
|
||||
'found': True
|
||||
}
|
||||
|
||||
def get_all_queued_uuids(self, limit=None, offset=0):
|
||||
"""
|
||||
Get UUIDs currently in the queue with their positions.
|
||||
For large queues, use limit/offset for pagination.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of items to return (None = all)
|
||||
offset: Number of items to skip (for pagination)
|
||||
|
||||
Returns:
|
||||
dict: Contains items and metadata
|
||||
- items: List of dicts with uuid, position, and priority
|
||||
- total_items: Total number of items in queue
|
||||
- returned_items: Number of items returned
|
||||
- has_more: Whether there are more items after this page
|
||||
"""
|
||||
with self.mutex:
|
||||
queue_list = list(self.queue)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {
|
||||
'items': [],
|
||||
'total_items': 0,
|
||||
'returned_items': 0,
|
||||
'has_more': False
|
||||
}
|
||||
|
||||
# For very large queues, warn about performance
|
||||
if total_items > 1000 and limit is None:
|
||||
logger.warning(f"Getting all {total_items} queued items without limit - this may be slow")
|
||||
|
||||
# Sort only if we need exact positions (expensive for large queues)
|
||||
if limit is not None and limit <= 100:
|
||||
# For small requests, we can afford to sort
|
||||
queue_items = sorted(queue_list)
|
||||
end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items)
|
||||
items_to_process = queue_items[offset:end_idx]
|
||||
|
||||
result = []
|
||||
for position, item in enumerate(items_to_process, start=offset):
|
||||
if (hasattr(item, 'item') and
|
||||
isinstance(item.item, dict) and
|
||||
'uuid' in item.item):
|
||||
|
||||
result.append({
|
||||
'uuid': item.item['uuid'],
|
||||
'position': position,
|
||||
'priority': item.priority
|
||||
})
|
||||
|
||||
return {
|
||||
'items': result,
|
||||
'total_items': total_items,
|
||||
'returned_items': len(result),
|
||||
'has_more': (offset + len(result)) < total_items
|
||||
}
|
||||
else:
|
||||
# For large requests, return items with approximate positions
|
||||
# This is much faster O(n) instead of O(n log n)
|
||||
result = []
|
||||
processed = 0
|
||||
skipped = 0
|
||||
|
||||
for item in queue_list:
|
||||
if (hasattr(item, 'item') and
|
||||
isinstance(item.item, dict) and
|
||||
'uuid' in item.item):
|
||||
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
if limit and processed >= limit:
|
||||
break
|
||||
|
||||
# Approximate position based on priority comparison
|
||||
approx_position = sum(1 for other in queue_list if other.priority < item.priority)
|
||||
|
||||
result.append({
|
||||
'uuid': item.item['uuid'],
|
||||
'position': approx_position, # Approximate
|
||||
'priority': item.priority
|
||||
})
|
||||
processed += 1
|
||||
|
||||
return {
|
||||
'items': result,
|
||||
'total_items': total_items,
|
||||
'returned_items': len(result),
|
||||
'has_more': (offset + len(result)) < total_items,
|
||||
'note': 'Positions are approximate for performance with large queues'
|
||||
}
|
||||
|
||||
def get_queue_summary(self):
|
||||
"""
|
||||
Get a quick summary of queue state without expensive operations.
|
||||
O(n) complexity - fast even for large queues.
|
||||
|
||||
Returns:
|
||||
dict: Queue summary statistics
|
||||
"""
|
||||
with self.mutex:
|
||||
queue_list = list(self.queue)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {
|
||||
'total_items': 0,
|
||||
'priority_breakdown': {},
|
||||
'immediate_items': 0,
|
||||
'clone_items': 0,
|
||||
'scheduled_items': 0
|
||||
}
|
||||
|
||||
# Count items by priority type - O(n)
|
||||
immediate_items = 0 # priority 1
|
||||
clone_items = 0 # priority 5
|
||||
scheduled_items = 0 # priority > 100 (timestamps)
|
||||
priority_counts = {}
|
||||
|
||||
for item in queue_list:
|
||||
priority = item.priority
|
||||
priority_counts[priority] = priority_counts.get(priority, 0) + 1
|
||||
|
||||
if priority == 1:
|
||||
immediate_items += 1
|
||||
elif priority == 5:
|
||||
clone_items += 1
|
||||
elif priority > 100:
|
||||
scheduled_items += 1
|
||||
|
||||
return {
|
||||
'total_items': total_items,
|
||||
'priority_breakdown': priority_counts,
|
||||
'immediate_items': immediate_items,
|
||||
'clone_items': clone_items,
|
||||
'scheduled_items': scheduled_items,
|
||||
'min_priority': min(priority_counts.keys()) if priority_counts else None,
|
||||
'max_priority': max(priority_counts.keys()) if priority_counts else None
|
||||
}
|
||||
|
||||
|
||||
class AsyncSignalPriorityQueue(asyncio.PriorityQueue):
|
||||
"""
|
||||
Async version of SignalPriorityQueue that sends signals when items are added/removed.
|
||||
|
||||
This class extends asyncio.PriorityQueue and maintains the same signal behavior
|
||||
as the synchronous version for real-time UI updates.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize=0):
|
||||
super().__init__(maxsize)
|
||||
try:
|
||||
self.queue_length_signal = signal('queue_length')
|
||||
except Exception as e:
|
||||
logger.critical(f"Exception: {e}")
|
||||
|
||||
async def put(self, item):
|
||||
# Call the parent's put method first
|
||||
await super().put(item)
|
||||
|
||||
# After putting the item in the queue, check if it has a UUID and emit signal
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
||||
uuid = item.item['uuid']
|
||||
# Get the signal and send it if it exists
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
# Send the watch_uuid parameter
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
# Send queue_length signal with current queue size
|
||||
try:
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
except Exception as e:
|
||||
logger.critical(f"Exception: {e}")
|
||||
|
||||
async def get(self):
|
||||
# Call the parent's get method first
|
||||
item = await super().get()
|
||||
|
||||
# Send queue_length signal with current queue size
|
||||
try:
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
except Exception as e:
|
||||
logger.critical(f"Exception: {e}")
|
||||
return item
|
||||
|
||||
@property
|
||||
def queue(self):
|
||||
"""
|
||||
Provide compatibility with sync PriorityQueue.queue access
|
||||
Returns the internal queue for template access
|
||||
"""
|
||||
return self._queue if hasattr(self, '_queue') else []
|
||||
|
||||
def get_uuid_position(self, target_uuid):
|
||||
"""
|
||||
Find the position of a watch UUID in the async priority queue.
|
||||
Optimized for large queues - O(n) complexity instead of O(n log n).
|
||||
|
||||
Args:
|
||||
target_uuid: The UUID to search for
|
||||
|
||||
Returns:
|
||||
dict: Contains position info or None if not found
|
||||
- position: 0-based position in queue (0 = next to be processed)
|
||||
- total_items: total number of items in queue
|
||||
- priority: the priority value of the found item
|
||||
"""
|
||||
queue_list = list(self._queue)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {
|
||||
'position': None,
|
||||
'total_items': 0,
|
||||
'priority': None,
|
||||
'found': False
|
||||
}
|
||||
|
||||
# Find the target item and its priority first - O(n)
|
||||
target_item = None
|
||||
target_priority = None
|
||||
|
||||
for item in queue_list:
|
||||
if (hasattr(item, 'item') and
|
||||
isinstance(item.item, dict) and
|
||||
item.item.get('uuid') == target_uuid):
|
||||
target_item = item
|
||||
target_priority = item.priority
|
||||
break
|
||||
|
||||
if target_item is None:
|
||||
return {
|
||||
'position': None,
|
||||
'total_items': total_items,
|
||||
'priority': None,
|
||||
'found': False
|
||||
}
|
||||
|
||||
# Count how many items have higher priority (lower numbers) - O(n)
|
||||
position = 0
|
||||
for item in queue_list:
|
||||
if item.priority < target_priority:
|
||||
position += 1
|
||||
elif item.priority == target_priority and item != target_item:
|
||||
position += 1
|
||||
|
||||
return {
|
||||
'position': position,
|
||||
'total_items': total_items,
|
||||
'priority': target_priority,
|
||||
'found': True
|
||||
}
|
||||
|
||||
def get_all_queued_uuids(self, limit=None, offset=0):
|
||||
"""
|
||||
Get UUIDs currently in the async queue with their positions.
|
||||
For large queues, use limit/offset for pagination.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of items to return (None = all)
|
||||
offset: Number of items to skip (for pagination)
|
||||
|
||||
Returns:
|
||||
dict: Contains items and metadata (same structure as sync version)
|
||||
"""
|
||||
queue_list = list(self._queue)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {
|
||||
'items': [],
|
||||
'total_items': 0,
|
||||
'returned_items': 0,
|
||||
'has_more': False
|
||||
}
|
||||
|
||||
# Same logic as sync version but without mutex
|
||||
if limit is not None and limit <= 100:
|
||||
queue_items = sorted(queue_list)
|
||||
end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items)
|
||||
items_to_process = queue_items[offset:end_idx]
|
||||
|
||||
result = []
|
||||
for position, item in enumerate(items_to_process, start=offset):
|
||||
if (hasattr(item, 'item') and
|
||||
isinstance(item.item, dict) and
|
||||
'uuid' in item.item):
|
||||
|
||||
result.append({
|
||||
'uuid': item.item['uuid'],
|
||||
'position': position,
|
||||
'priority': item.priority
|
||||
})
|
||||
|
||||
return {
|
||||
'items': result,
|
||||
'total_items': total_items,
|
||||
'returned_items': len(result),
|
||||
'has_more': (offset + len(result)) < total_items
|
||||
}
|
||||
else:
|
||||
# Fast approximate positions for large queues
|
||||
result = []
|
||||
processed = 0
|
||||
skipped = 0
|
||||
|
||||
for item in queue_list:
|
||||
if (hasattr(item, 'item') and
|
||||
isinstance(item.item, dict) and
|
||||
'uuid' in item.item):
|
||||
|
||||
if skipped < offset:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
if limit and processed >= limit:
|
||||
break
|
||||
|
||||
approx_position = sum(1 for other in queue_list if other.priority < item.priority)
|
||||
|
||||
result.append({
|
||||
'uuid': item.item['uuid'],
|
||||
'position': approx_position,
|
||||
'priority': item.priority
|
||||
})
|
||||
processed += 1
|
||||
|
||||
return {
|
||||
'items': result,
|
||||
'total_items': total_items,
|
||||
'returned_items': len(result),
|
||||
'has_more': (offset + len(result)) < total_items,
|
||||
'note': 'Positions are approximate for performance with large queues'
|
||||
}
|
||||
|
||||
def get_queue_summary(self):
|
||||
"""
|
||||
Get a quick summary of async queue state.
|
||||
O(n) complexity - fast even for large queues.
|
||||
"""
|
||||
queue_list = list(self._queue)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {
|
||||
'total_items': 0,
|
||||
'priority_breakdown': {},
|
||||
'immediate_items': 0,
|
||||
'clone_items': 0,
|
||||
'scheduled_items': 0
|
||||
}
|
||||
|
||||
immediate_items = 0
|
||||
clone_items = 0
|
||||
scheduled_items = 0
|
||||
priority_counts = {}
|
||||
|
||||
for item in queue_list:
|
||||
priority = item.priority
|
||||
priority_counts[priority] = priority_counts.get(priority, 0) + 1
|
||||
|
||||
if priority == 1:
|
||||
immediate_items += 1
|
||||
elif priority == 5:
|
||||
clone_items += 1
|
||||
elif priority > 100:
|
||||
scheduled_items += 1
|
||||
|
||||
return {
|
||||
'total_items': total_items,
|
||||
'priority_breakdown': priority_counts,
|
||||
'immediate_items': immediate_items,
|
||||
'clone_items': clone_items,
|
||||
'scheduled_items': scheduled_items,
|
||||
'min_priority': min(priority_counts.keys()) if priority_counts else None,
|
||||
'max_priority': max(priority_counts.keys()) if priority_counts else None
|
||||
}
|
||||
@@ -1,113 +0,0 @@
|
||||
import difflib
|
||||
from typing import List, Iterator, Union
|
||||
|
||||
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
|
||||
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
|
||||
|
||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
|
||||
"""Return a slice of the list, or a single element if start == end."""
|
||||
return lst[start:end] if start != end else [lst[start]]
|
||||
|
||||
def customSequenceMatcher(
|
||||
before: List[str],
|
||||
after: List[str],
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True,
|
||||
html_colour: bool = False
|
||||
) -> Iterator[List[str]]:
|
||||
"""
|
||||
Compare two sequences and yield differences based on specified parameters.
|
||||
|
||||
Args:
|
||||
before (List[str]): Original sequence
|
||||
after (List[str]): Modified sequence
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
html_colour (bool): Use HTML background colors for differences
|
||||
|
||||
Yields:
|
||||
List[str]: Differences between sequences
|
||||
"""
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after)
|
||||
|
||||
|
||||
|
||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||
if include_equal and tag == 'equal':
|
||||
yield before[alo:ahi]
|
||||
elif include_removed and tag == 'delete':
|
||||
if html_colour:
|
||||
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)]
|
||||
else:
|
||||
yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi)
|
||||
elif include_replaced and tag == 'replace':
|
||||
if html_colour:
|
||||
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] + \
|
||||
[f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \
|
||||
[f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
|
||||
elif include_added and tag == 'insert':
|
||||
if html_colour:
|
||||
yield [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi)
|
||||
|
||||
def render_diff(
|
||||
previous_version_file_contents: str,
|
||||
newest_version_file_contents: str,
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
line_feed_sep: str = "\n",
|
||||
include_change_type_prefix: bool = True,
|
||||
patch_format: bool = False,
|
||||
html_colour: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
Render the difference between two file contents.
|
||||
|
||||
Args:
|
||||
previous_version_file_contents (str): Original file contents
|
||||
newest_version_file_contents (str): Modified file contents
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
line_feed_sep (str): Separator for lines in output
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
patch_format (bool): Use patch format for output
|
||||
html_colour (bool): Use HTML background colors for differences
|
||||
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
"""
|
||||
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
|
||||
|
||||
if patch_format:
|
||||
patch = difflib.unified_diff(previous_lines, newest_lines)
|
||||
return line_feed_sep.join(patch)
|
||||
|
||||
rendered_diff = customSequenceMatcher(
|
||||
before=previous_lines,
|
||||
after=newest_lines,
|
||||
include_equal=include_equal,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix,
|
||||
html_colour=html_colour
|
||||
)
|
||||
|
||||
def flatten(lst: List[Union[str, List[str]]]) -> str:
|
||||
return line_feed_sep.join(flatten(x) if isinstance(x, list) else x for x in lst)
|
||||
|
||||
return flatten(rendered_diff)
|
||||
462
changedetectionio/diff/__init__.py
Normal file
462
changedetectionio/diff/__init__.py
Normal file
@@ -0,0 +1,462 @@
|
||||
"""
|
||||
Diff rendering module for change detection.
|
||||
|
||||
This module provides functions for rendering differences between text content,
|
||||
with support for various output formats and tokenization strategies.
|
||||
"""
|
||||
|
||||
import difflib
|
||||
from typing import List, Iterator, Union
|
||||
import diff_match_patch as dmp_module
|
||||
import re
|
||||
|
||||
from .tokenizers import TOKENIZERS, tokenize_words_and_html
|
||||
from ..notification_service import CUSTOM_LINEBREAK_PLACEHOLDER
|
||||
|
||||
# Remember! gmail, outlook etc dont support <style> must be inline.
|
||||
# Gmail: strips <ins> and <del> tags entirely.
|
||||
# This is for the WHOLE line background style
|
||||
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
|
||||
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
|
||||
HTML_REMOVED_STYLE = REMOVED_STYLE # Export alias for handler.py
|
||||
HTML_ADDED_STYLE = ADDED_STYLE # Export alias for handler.py
|
||||
|
||||
# Darker backgrounds for nested highlighting (changed parts within lines)
|
||||
REMOVED_INNER_STYLE = "background-color: #ff867a; color: #111;"
|
||||
ADDED_INNER_STYLE = "background-color: #b2e841; color: #444;"
|
||||
HTML_CHANGED_STYLE = REMOVED_STYLE
|
||||
HTML_CHANGED_INTO_STYLE = ADDED_STYLE
|
||||
|
||||
# Placemarker constants - these get replaced by apply_service_tweaks() in handler.py
|
||||
# Something that cant get escaped to HTML by accident
|
||||
REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN'
|
||||
REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED'
|
||||
|
||||
ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN'
|
||||
ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN'
|
||||
CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN'
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
|
||||
# Compiled regex patterns for performance
|
||||
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
|
||||
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
|
||||
"""
|
||||
Render word-level differences between two lines inline using diff-match-patch library.
|
||||
|
||||
Args:
|
||||
before_line: Original line text
|
||||
after_line: Modified line text
|
||||
ignore_junk: Ignore whitespace-only changes
|
||||
markdown_style: Unused (kept for backwards compatibility)
|
||||
tokenizer: Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
|
||||
|
||||
Returns:
|
||||
tuple[str, bool]: (diff output with inline word-level highlighting, has_changes flag)
|
||||
"""
|
||||
# Normalize whitespace if ignore_junk is enabled
|
||||
if ignore_junk:
|
||||
# Normalize whitespace: replace multiple spaces/tabs with single space
|
||||
before_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', before_line)
|
||||
after_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', after_line)
|
||||
else:
|
||||
before_normalized = before_line
|
||||
after_normalized = after_line
|
||||
|
||||
# Use diff-match-patch with word-level tokenization
|
||||
# Strategy: Use linesToChars to treat words as atomic units
|
||||
dmp = dmp_module.diff_match_patch()
|
||||
|
||||
# Get the tokenizer function from the registry
|
||||
tokenizer_func = TOKENIZERS.get(tokenizer, tokenize_words_and_html)
|
||||
|
||||
# Tokenize both lines using the selected tokenizer
|
||||
before_tokens = tokenizer_func(before_normalized)
|
||||
after_tokens = tokenizer_func(after_normalized or ' ')
|
||||
|
||||
# Create mappings for linesToChars (using it for word-mode)
|
||||
# Join tokens with newline so each "line" is a token
|
||||
before_text = '\n'.join(before_tokens)
|
||||
after_text = '\n'.join(after_tokens)
|
||||
|
||||
# Use linesToChars for word-mode diffing
|
||||
lines_result = dmp.diff_linesToChars(before_text, after_text)
|
||||
line_before, line_after, line_array = lines_result
|
||||
|
||||
# Perform diff on the encoded strings
|
||||
diffs = dmp.diff_main(line_before, line_after, False)
|
||||
|
||||
# Convert back to original text
|
||||
dmp.diff_charsToLines(diffs, line_array)
|
||||
|
||||
# Remove the newlines we added for tokenization
|
||||
diffs = [(op, text.replace('\n', '')) for op, text in diffs]
|
||||
|
||||
# DON'T apply semantic cleanup here - it would break token boundaries
|
||||
# (e.g., "63" -> "66" would become "6" + "3" vs "6" + "6")
|
||||
# We want to preserve the tokenizer's word boundaries
|
||||
|
||||
# Check if there are any changes
|
||||
has_changes = any(op != 0 for op, _ in diffs)
|
||||
|
||||
if ignore_junk and not has_changes:
|
||||
return after_line, False
|
||||
|
||||
# Check if the whole line is replaced (no unchanged content)
|
||||
whole_line_replaced = not any(op == 0 and text.strip() for op, text in diffs)
|
||||
|
||||
# Build the output using placemarkers
|
||||
# When whole line is replaced, wrap entire removed content once and entire added content once
|
||||
if whole_line_replaced:
|
||||
removed_tokens = []
|
||||
added_tokens = []
|
||||
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal (e.g., whitespace tokens in common positions)
|
||||
# Include in both removed and added to preserve spacing
|
||||
removed_tokens.append(text)
|
||||
added_tokens.append(text)
|
||||
elif op == -1: # Deletion
|
||||
removed_tokens.append(text)
|
||||
elif op == 1: # Insertion
|
||||
added_tokens.append(text)
|
||||
|
||||
# Join all tokens and wrap the entire string once for removed, once for added
|
||||
result_parts = []
|
||||
|
||||
if removed_tokens:
|
||||
removed_full = ''.join(removed_tokens).rstrip()
|
||||
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
|
||||
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
|
||||
|
||||
if added_tokens:
|
||||
if result_parts: # Add newline between removed and added
|
||||
result_parts.append('\n')
|
||||
added_full = ''.join(added_tokens).rstrip()
|
||||
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
|
||||
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
else:
|
||||
# Inline changes within the line
|
||||
result_parts = []
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal
|
||||
result_parts.append(text)
|
||||
elif op == 1: # Insertion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
elif op == -1: # Deletion
|
||||
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
|
||||
content = text.rstrip()
|
||||
trailing = text[len(content):] if len(text) > len(content) else ''
|
||||
if content:
|
||||
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
|
||||
else:
|
||||
result_parts.append(trailing)
|
||||
|
||||
return ''.join(result_parts), has_changes
|
||||
|
||||
|
||||
def render_nested_line_diff(before_line: str, after_line: str, ignore_junk: bool = False, tokenizer: str = 'words_and_html') -> tuple[str, str, bool]:
|
||||
"""
|
||||
Render line-level differences with nested highlighting for changed parts.
|
||||
|
||||
Returns two separate lines:
|
||||
- Before line: light red background with dark red on removed parts
|
||||
- After line: light green background with dark green on added parts
|
||||
|
||||
Args:
|
||||
before_line: Original line text
|
||||
after_line: Modified line text
|
||||
ignore_junk: Ignore whitespace-only changes
|
||||
tokenizer: Name of tokenizer to use from TOKENIZERS registry
|
||||
|
||||
Returns:
|
||||
tuple[str, str, bool]: (before_with_highlights, after_with_highlights, has_changes)
|
||||
"""
|
||||
# Normalize whitespace if ignore_junk is enabled
|
||||
if ignore_junk:
|
||||
before_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', before_line)
|
||||
after_normalized = WHITESPACE_NORMALIZE_RE.sub(' ', after_line)
|
||||
else:
|
||||
before_normalized = before_line
|
||||
after_normalized = after_line
|
||||
|
||||
# Use diff-match-patch with word-level tokenization
|
||||
dmp = dmp_module.diff_match_patch()
|
||||
|
||||
# Get the tokenizer function from the registry
|
||||
tokenizer_func = TOKENIZERS.get(tokenizer, tokenize_words_and_html)
|
||||
|
||||
# Tokenize both lines
|
||||
before_tokens = tokenizer_func(before_normalized)
|
||||
after_tokens = tokenizer_func(after_normalized or ' ')
|
||||
|
||||
# Create mappings for linesToChars
|
||||
before_text = '\n'.join(before_tokens)
|
||||
after_text = '\n'.join(after_tokens)
|
||||
|
||||
# Use linesToChars for word-mode diffing
|
||||
lines_result = dmp.diff_linesToChars(before_text, after_text)
|
||||
line_before, line_after, line_array = lines_result
|
||||
|
||||
# Perform diff on the encoded strings
|
||||
diffs = dmp.diff_main(line_before, line_after, False)
|
||||
|
||||
# Convert back to original text
|
||||
dmp.diff_charsToLines(diffs, line_array)
|
||||
|
||||
# Remove the newlines we added for tokenization
|
||||
diffs = [(op, text.replace('\n', '')) for op, text in diffs]
|
||||
|
||||
# DON'T apply semantic cleanup here - it would break token boundaries
|
||||
# (e.g., "63" -> "66" would become "6" + "3" vs "6" + "6")
|
||||
# We want to preserve the tokenizer's word boundaries
|
||||
|
||||
# Check if there are any changes
|
||||
has_changes = any(op != 0 for op, _ in diffs)
|
||||
|
||||
if ignore_junk and not has_changes:
|
||||
return before_line, after_line, False
|
||||
|
||||
# Build the before line (with nested highlighting for removed parts)
|
||||
before_parts = []
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal
|
||||
before_parts.append(text)
|
||||
elif op == -1: # Deletion (in before)
|
||||
before_parts.append(f'<span style="{REMOVED_INNER_STYLE}">{text}</span>')
|
||||
# Skip insertions (op == 1) for the before line
|
||||
|
||||
before_content = ''.join(before_parts)
|
||||
|
||||
# Build the after line (with nested highlighting for added parts)
|
||||
after_parts = []
|
||||
for op, text in diffs:
|
||||
if op == 0: # Equal
|
||||
after_parts.append(text)
|
||||
elif op == 1: # Insertion (in after)
|
||||
after_parts.append(f'<span style="{ADDED_INNER_STYLE}">{text}</span>')
|
||||
# Skip deletions (op == -1) for the after line
|
||||
|
||||
after_content = ''.join(after_parts)
|
||||
|
||||
# Wrap content with placemarkers (inner HTML highlighting is preserved)
|
||||
before_html = f'{CHANGED_PLACEMARKER_OPEN}{before_content}{CHANGED_PLACEMARKER_CLOSED}'
|
||||
after_html = f'{CHANGED_INTO_PLACEMARKER_OPEN}{after_content}{CHANGED_INTO_PLACEMARKER_CLOSED}'
|
||||
|
||||
return before_html, after_html, has_changes
|
||||
|
||||
|
||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
|
||||
"""Return a slice of the list, or a single element if start == end."""
|
||||
return lst[start:end] if start != end else [lst[start]]
|
||||
|
||||
def customSequenceMatcher(
|
||||
before: List[str],
|
||||
after: List[str],
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True,
|
||||
word_diff: bool = False,
|
||||
context_lines: int = 0,
|
||||
case_insensitive: bool = False,
|
||||
ignore_junk: bool = False,
|
||||
tokenizer: str = 'words_and_html'
|
||||
) -> Iterator[List[str]]:
|
||||
"""
|
||||
Compare two sequences and yield differences based on specified parameters.
|
||||
|
||||
Args:
|
||||
before (List[str]): Original sequence
|
||||
after (List[str]): Modified sequence
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
word_diff (bool): Use word-level diffing for replaced lines (controls inline rendering)
|
||||
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
|
||||
case_insensitive (bool): Perform case-insensitive comparison
|
||||
ignore_junk (bool): Ignore whitespace-only changes
|
||||
tokenizer (str): Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
|
||||
|
||||
Yields:
|
||||
List[str]: Differences between sequences
|
||||
"""
|
||||
# Prepare sequences for comparison (lowercase if case-insensitive, normalize whitespace if ignore_junk)
|
||||
def prepare_line(line):
|
||||
if case_insensitive:
|
||||
line = line.lower()
|
||||
if ignore_junk:
|
||||
# Normalize whitespace: replace multiple spaces/tabs with single space
|
||||
line = WHITESPACE_NORMALIZE_RE.sub(' ', line)
|
||||
return line
|
||||
|
||||
compare_before = [prepare_line(line) for line in before]
|
||||
compare_after = [prepare_line(line) for line in after]
|
||||
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=compare_before, b=compare_after)
|
||||
|
||||
# When context_lines is set and include_equal is False, we need to track which equal lines to include
|
||||
if context_lines > 0 and not include_equal:
|
||||
opcodes = list(cruncher.get_opcodes())
|
||||
# Mark equal ranges that should be included based on context
|
||||
included_equal_ranges = set()
|
||||
|
||||
for i, (tag, alo, ahi, blo, bhi) in enumerate(opcodes):
|
||||
if tag != 'equal':
|
||||
# Include context lines before this change
|
||||
for j in range(max(0, i - 1), i):
|
||||
if opcodes[j][0] == 'equal':
|
||||
prev_alo, prev_ahi = opcodes[j][1], opcodes[j][2]
|
||||
# Include last N lines of the previous equal block
|
||||
context_start = max(prev_alo, prev_ahi - context_lines)
|
||||
for line_num in range(context_start, prev_ahi):
|
||||
included_equal_ranges.add(line_num)
|
||||
|
||||
# Include context lines after this change
|
||||
for j in range(i + 1, min(len(opcodes), i + 2)):
|
||||
if opcodes[j][0] == 'equal':
|
||||
next_alo, next_ahi = opcodes[j][1], opcodes[j][2]
|
||||
# Include first N lines of the next equal block
|
||||
context_end = min(next_ahi, next_alo + context_lines)
|
||||
for line_num in range(next_alo, context_end):
|
||||
included_equal_ranges.add(line_num)
|
||||
|
||||
# Remember! gmail, outlook etc dont support <style> must be inline.
|
||||
# Gmail: strips <ins> and <del> tags entirely.
|
||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||
if tag == 'equal':
|
||||
if include_equal:
|
||||
yield before[alo:ahi]
|
||||
elif context_lines > 0:
|
||||
# Only include equal lines that are in the context range
|
||||
context_lines_to_include = [before[i] for i in range(alo, ahi) if i in included_equal_ranges]
|
||||
if context_lines_to_include:
|
||||
yield context_lines_to_include
|
||||
elif include_removed and tag == 'delete':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)]
|
||||
else:
|
||||
yield same_slicer(before, alo, ahi)
|
||||
elif include_replaced and tag == 'replace':
|
||||
before_lines = same_slicer(before, alo, ahi)
|
||||
after_lines = same_slicer(after, blo, bhi)
|
||||
|
||||
# Use inline word-level diff for single line replacements when word_diff is enabled
|
||||
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
|
||||
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
|
||||
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
|
||||
if ignore_junk and not has_changes:
|
||||
# No real changes, skip this line
|
||||
continue
|
||||
yield [inline_diff]
|
||||
else:
|
||||
# Fall back to line-level diff for multi-line changes
|
||||
if include_change_type_prefix:
|
||||
yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in before_lines] + \
|
||||
[f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in after_lines]
|
||||
else:
|
||||
yield before_lines + after_lines
|
||||
elif include_added and tag == 'insert':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield same_slicer(after, blo, bhi)
|
||||
|
||||
def render_diff(
|
||||
previous_version_file_contents: str,
|
||||
newest_version_file_contents: str,
|
||||
include_equal: bool = False,
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True,
|
||||
patch_format: bool = False,
|
||||
word_diff: bool = True,
|
||||
context_lines: int = 0,
|
||||
case_insensitive: bool = False,
|
||||
ignore_junk: bool = False,
|
||||
tokenizer: str = 'words_and_html'
|
||||
) -> str:
|
||||
"""
|
||||
Render the difference between two file contents.
|
||||
|
||||
Args:
|
||||
previous_version_file_contents (str): Original file contents
|
||||
newest_version_file_contents (str): Modified file contents
|
||||
include_equal (bool): Include unchanged parts
|
||||
include_removed (bool): Include removed parts
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
patch_format (bool): Use patch format for output
|
||||
word_diff (bool): Use word-level diffing for replaced lines (controls inline rendering)
|
||||
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
|
||||
case_insensitive (bool): Perform case-insensitive comparison, By default the test_json_diff/process.py is case sensitive, so this follows same logic
|
||||
ignore_junk (bool): Ignore whitespace-only changes
|
||||
tokenizer (str): Name of tokenizer to use from TOKENIZERS registry (default: 'words_and_html')
|
||||
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
"""
|
||||
newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||
previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []
|
||||
|
||||
if patch_format:
|
||||
patch = difflib.unified_diff(previous_lines, newest_lines)
|
||||
return CUSTOM_LINEBREAK_PLACEHOLDER.join(patch)
|
||||
|
||||
rendered_diff = customSequenceMatcher(
|
||||
before=previous_lines,
|
||||
after=newest_lines,
|
||||
include_equal=include_equal,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix,
|
||||
word_diff=word_diff,
|
||||
context_lines=context_lines,
|
||||
case_insensitive=case_insensitive,
|
||||
ignore_junk=ignore_junk,
|
||||
tokenizer=tokenizer
|
||||
)
|
||||
|
||||
def flatten(lst: List[Union[str, List[str]]]) -> str:
|
||||
result = []
|
||||
for x in lst:
|
||||
if isinstance(x, list):
|
||||
result.extend(x)
|
||||
else:
|
||||
result.append(x)
|
||||
return CUSTOM_LINEBREAK_PLACEHOLDER.join(result)
|
||||
|
||||
return flatten(rendered_diff)
|
||||
|
||||
|
||||
# Export main public API
|
||||
__all__ = [
|
||||
'render_diff',
|
||||
'customSequenceMatcher',
|
||||
'render_inline_word_diff',
|
||||
'render_nested_line_diff',
|
||||
'TOKENIZERS',
|
||||
'REMOVED_STYLE',
|
||||
'ADDED_STYLE',
|
||||
'REMOVED_INNER_STYLE',
|
||||
'ADDED_INNER_STYLE',
|
||||
]
|
||||
23
changedetectionio/diff/tokenizers/__init__.py
Normal file
23
changedetectionio/diff/tokenizers/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Tokenizers for diff operations.
|
||||
|
||||
This module provides various tokenization strategies for use with the diff system.
|
||||
New tokenizers can be easily added by:
|
||||
1. Creating a new module in this directory
|
||||
2. Importing and registering it in the TOKENIZERS dictionary below
|
||||
"""
|
||||
|
||||
from .natural_text import tokenize_words
|
||||
from .words_and_html import tokenize_words_and_html
|
||||
|
||||
# Tokenizer registry - maps tokenizer names to functions
|
||||
TOKENIZERS = {
|
||||
'words': tokenize_words,
|
||||
'words_and_html': tokenize_words_and_html,
|
||||
}
|
||||
|
||||
__all__ = [
|
||||
'tokenize_words',
|
||||
'tokenize_words_and_html',
|
||||
'TOKENIZERS',
|
||||
]
|
||||
44
changedetectionio/diff/tokenizers/natural_text.py
Normal file
44
changedetectionio/diff/tokenizers/natural_text.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Simple word tokenizer using whitespace boundaries.
|
||||
|
||||
This is a simpler tokenizer that treats all whitespace as token boundaries
|
||||
without special handling for HTML tags or other markup.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def tokenize_words(text: str) -> List[str]:
|
||||
"""
|
||||
Split text into words using simple whitespace boundaries.
|
||||
|
||||
This is a simpler tokenizer that treats all whitespace as token boundaries
|
||||
without special handling for HTML tags.
|
||||
|
||||
Args:
|
||||
text: Input text to tokenize
|
||||
|
||||
Returns:
|
||||
List of tokens (words and whitespace)
|
||||
|
||||
Examples:
|
||||
>>> tokenize_words("Hello world")
|
||||
['Hello', ' ', 'world']
|
||||
>>> tokenize_words("one two")
|
||||
['one', ' ', ' ', 'two']
|
||||
"""
|
||||
tokens = []
|
||||
current = ''
|
||||
|
||||
for char in text:
|
||||
if char.isspace():
|
||||
if current:
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
tokens.append(char)
|
||||
else:
|
||||
current += char
|
||||
|
||||
if current:
|
||||
tokens.append(current)
|
||||
return tokens
|
||||
61
changedetectionio/diff/tokenizers/words_and_html.py
Normal file
61
changedetectionio/diff/tokenizers/words_and_html.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
Tokenizer that preserves HTML tags as atomic units while splitting on whitespace.
|
||||
|
||||
This tokenizer is specifically designed for HTML content where:
|
||||
- HTML tags should remain intact (e.g., '<p>', '<a href="...">')
|
||||
- Whitespace tokens are preserved for accurate diff reconstruction
|
||||
- Words are split on whitespace boundaries
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def tokenize_words_and_html(text: str) -> List[str]:
|
||||
"""
|
||||
Split text into words and boundaries (spaces, HTML tags).
|
||||
|
||||
This tokenizer preserves HTML tags as atomic units while splitting on whitespace.
|
||||
Useful for content that contains HTML markup.
|
||||
|
||||
Args:
|
||||
text: Input text to tokenize
|
||||
|
||||
Returns:
|
||||
List of tokens (words, spaces, HTML tags)
|
||||
|
||||
Examples:
|
||||
>>> tokenize_words_and_html("<p>Hello world</p>")
|
||||
['<p>', 'Hello', ' ', 'world', '</p>']
|
||||
>>> tokenize_words_and_html("<a href='test.com'>link</a>")
|
||||
['<a href=\\'test.com\\'>', 'link', '</a>']
|
||||
"""
|
||||
tokens = []
|
||||
current = ''
|
||||
in_tag = False
|
||||
|
||||
for char in text:
|
||||
if char == '<':
|
||||
# Start of HTML tag
|
||||
if current:
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
current = '<'
|
||||
in_tag = True
|
||||
elif char == '>' and in_tag:
|
||||
# End of HTML tag
|
||||
current += '>'
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
in_tag = False
|
||||
elif char.isspace() and not in_tag:
|
||||
# Space outside of tag
|
||||
if current:
|
||||
tokens.append(current)
|
||||
current = ''
|
||||
tokens.append(char)
|
||||
else:
|
||||
current += char
|
||||
|
||||
if current:
|
||||
tokens.append(current)
|
||||
return tokens
|
||||
@@ -4,49 +4,53 @@ import flask_login
|
||||
import locale
|
||||
import os
|
||||
import queue
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import timeago
|
||||
from blinker import signal
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
from flask import (
|
||||
Flask,
|
||||
abort,
|
||||
flash,
|
||||
make_response,
|
||||
redirect,
|
||||
render_template,
|
||||
request,
|
||||
send_from_directory,
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
from flask_login import current_user
|
||||
from flask_paginate import Pagination, get_page_parameter
|
||||
from flask_restful import abort, Api
|
||||
from flask_cors import CORS
|
||||
|
||||
# Create specific signals for application events
|
||||
# Make this a global singleton to avoid multiple signal objects
|
||||
watch_check_update = signal('watch_check_update', doc='Signal sent when a watch check is completed')
|
||||
from flask_wtf import CSRFProtect
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio import __version__
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
|
||||
datastore = None
|
||||
|
||||
# Local
|
||||
running_update_threads = []
|
||||
ticker_thread = None
|
||||
|
||||
extra_stylesheets = []
|
||||
|
||||
update_q = queue.PriorityQueue()
|
||||
notification_q = queue.Queue()
|
||||
# Use bulletproof janus-based queues for sync/async reliability
|
||||
update_q = RecheckPriorityQueue()
|
||||
notification_q = NotificationQueue()
|
||||
MAX_QUEUE_SIZE = 2000
|
||||
|
||||
app = Flask(__name__,
|
||||
@@ -54,6 +58,9 @@ app = Flask(__name__,
|
||||
static_folder="static",
|
||||
template_folder="templates")
|
||||
|
||||
# Will be initialized in changedetection_app
|
||||
socketio_server = None
|
||||
|
||||
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
|
||||
CORS(app)
|
||||
|
||||
@@ -91,7 +98,7 @@ watch_api = Api(app, decorators=[csrf.exempt])
|
||||
def init_app_secret(datastore_path):
|
||||
secret = ""
|
||||
|
||||
path = "{}/secret.txt".format(datastore_path)
|
||||
path = os.path.join(datastore_path, "secret.txt")
|
||||
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
@@ -115,6 +122,23 @@ def get_darkmode_state():
|
||||
def get_css_version():
|
||||
return __version__
|
||||
|
||||
@app.template_global()
|
||||
def get_socketio_path():
|
||||
"""Generate the correct Socket.IO path prefix for the client"""
|
||||
# If behind a proxy with a sub-path, we need to respect that path
|
||||
prefix = ""
|
||||
if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
|
||||
prefix = request.headers['X-Forwarded-Prefix']
|
||||
|
||||
# Socket.IO will be available at {prefix}/socket.io/
|
||||
return prefix
|
||||
|
||||
@app.template_global('is_safe_valid_url')
|
||||
def _is_safe_valid_url(test_url):
|
||||
from .validate_url import is_safe_valid_url
|
||||
return is_safe_valid_url(test_url)
|
||||
|
||||
|
||||
@app.template_filter('format_number_locale')
|
||||
def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||
@@ -125,10 +149,32 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
# Worker thread tells us which UUID it is currently processing.
|
||||
for t in running_update_threads:
|
||||
if t.current_uuid == watch_obj['uuid']:
|
||||
return True
|
||||
return worker_handler.is_watch_running(watch_obj['uuid'])
|
||||
|
||||
@app.template_global('get_watch_queue_position')
|
||||
def _get_watch_queue_position(watch_obj):
|
||||
"""Get the position of a watch in the queue"""
|
||||
uuid = watch_obj['uuid']
|
||||
return update_q.get_uuid_position(uuid)
|
||||
|
||||
@app.template_global('get_current_worker_count')
|
||||
def _get_current_worker_count():
|
||||
"""Get the current number of operational workers"""
|
||||
return worker_handler.get_worker_count()
|
||||
|
||||
@app.template_global('get_worker_status_info')
|
||||
def _get_worker_status_info():
|
||||
"""Get detailed worker status information for display"""
|
||||
status = worker_handler.get_worker_status()
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
return {
|
||||
'count': status['worker_count'],
|
||||
'type': status['worker_type'],
|
||||
'active_workers': len(running_uuids),
|
||||
'processing_watches': running_uuids,
|
||||
'loop_running': status.get('async_loop_running', None)
|
||||
}
|
||||
|
||||
|
||||
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
|
||||
@@ -215,12 +261,15 @@ class User(flask_login.UserMixin):
|
||||
def changedetection_app(config=None, datastore_o=None):
|
||||
logger.trace("TRACE log is enabled")
|
||||
|
||||
global datastore
|
||||
global datastore, socketio_server
|
||||
datastore = datastore_o
|
||||
|
||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE'] = datastore_o
|
||||
|
||||
# Store the signal in the app config to ensure it's accessible everywhere
|
||||
app.config['watch_check_update_SIGNAL'] = watch_check_update
|
||||
|
||||
login_manager = flask_login.LoginManager(app)
|
||||
login_manager.login_view = 'login'
|
||||
@@ -248,6 +297,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# RSS access with token is allowed
|
||||
elif request.endpoint and 'rss.feed' in request.endpoint:
|
||||
return None
|
||||
# Socket.IO routes - need separate handling
|
||||
elif request.path.startswith('/socket.io/'):
|
||||
return None
|
||||
# API routes - use their own auth mechanism (@auth.check_token)
|
||||
elif request.path.startswith('/api/'):
|
||||
return None
|
||||
@@ -255,10 +307,15 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
return login_manager.unauthorized()
|
||||
|
||||
|
||||
watch_api.add_resource(WatchHistoryDiff,
|
||||
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchSingleHistory,
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(WatchFavicon,
|
||||
'/api/v1/watch/<string:uuid>/favicon',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchHistory,
|
||||
'/api/v1/watch/<string:uuid>/history',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
@@ -280,7 +337,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(Search, '/api/v1/search',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
@@ -333,7 +390,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# We would sometimes get login loop errors on sites hosted in sub-paths
|
||||
|
||||
# note for the future:
|
||||
# if not is_safe_url(next):
|
||||
# if not is_safe_valid_url(next):
|
||||
# return flask.abort(400)
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@@ -378,6 +435,32 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
if group == 'favicon':
|
||||
# Could be sensitive, follow password requirements
|
||||
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
|
||||
abort(403)
|
||||
# Get the watch object
|
||||
watch = datastore.data['watching'].get(filename)
|
||||
if not watch:
|
||||
abort(404)
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
|
||||
if group == 'visual_selector_data':
|
||||
# Could be sensitive, follow password requirements
|
||||
@@ -444,11 +527,22 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# watchlist UI buttons etc
|
||||
import changedetectionio.blueprint.ui as ui
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, running_update_threads, queuedWatchMetaData))
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update))
|
||||
|
||||
import changedetectionio.blueprint.watchlist as watchlist
|
||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||
|
||||
|
||||
# Initialize Socket.IO server conditionally based on settings
|
||||
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
if socket_io_enabled:
|
||||
from changedetectionio.realtime.socket_server import init_socketio
|
||||
global socketio_server
|
||||
socketio_server = init_socketio(app, datastore)
|
||||
logger.info("Socket.IO server initialized")
|
||||
else:
|
||||
logger.info("Socket.IO server disabled via settings")
|
||||
socketio_server = None
|
||||
|
||||
# Memory cleanup endpoint
|
||||
@app.route('/gc-cleanup', methods=['GET'])
|
||||
@login_optionally_required
|
||||
@@ -459,14 +553,95 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
result = memory_cleanup(app)
|
||||
return jsonify({"status": "success", "message": "Memory cleanup completed", "result": result})
|
||||
|
||||
# Worker health check endpoint
|
||||
@app.route('/worker-health', methods=['GET'])
|
||||
@login_optionally_required
|
||||
def worker_health():
|
||||
from flask import jsonify
|
||||
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
|
||||
# Get basic status
|
||||
status = worker_handler.get_worker_status()
|
||||
|
||||
# Perform health check
|
||||
health_result = worker_handler.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=app,
|
||||
datastore=datastore
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
"status": "success",
|
||||
"worker_status": status,
|
||||
"health_check": health_result,
|
||||
"expected_workers": expected_workers
|
||||
})
|
||||
|
||||
# Queue status endpoint
|
||||
@app.route('/queue-status', methods=['GET'])
|
||||
@login_optionally_required
|
||||
def queue_status():
|
||||
from flask import jsonify, request
|
||||
|
||||
# Get specific UUID position if requested
|
||||
target_uuid = request.args.get('uuid')
|
||||
|
||||
if target_uuid:
|
||||
position_info = update_q.get_uuid_position(target_uuid)
|
||||
return jsonify({
|
||||
"status": "success",
|
||||
"uuid": target_uuid,
|
||||
"queue_position": position_info
|
||||
})
|
||||
else:
|
||||
# Get pagination parameters
|
||||
limit = request.args.get('limit', type=int)
|
||||
offset = request.args.get('offset', type=int, default=0)
|
||||
summary_only = request.args.get('summary', type=bool, default=False)
|
||||
|
||||
if summary_only:
|
||||
# Fast summary for large queues
|
||||
summary = update_q.get_queue_summary()
|
||||
return jsonify({
|
||||
"status": "success",
|
||||
"queue_summary": summary
|
||||
})
|
||||
else:
|
||||
# Get queued items with pagination support
|
||||
if limit is None:
|
||||
# Default limit for large queues to prevent performance issues
|
||||
queue_size = update_q.qsize()
|
||||
if queue_size > 100:
|
||||
limit = 50
|
||||
logger.warning(f"Large queue ({queue_size} items) detected, limiting to {limit} items. Use ?limit=N for more.")
|
||||
|
||||
all_queued = update_q.get_all_queued_uuids(limit=limit, offset=offset)
|
||||
return jsonify({
|
||||
"status": "success",
|
||||
"queue_size": update_q.qsize(),
|
||||
"queued_data": all_queued
|
||||
})
|
||||
|
||||
# Start the async workers during app initialization
|
||||
# Can be overridden by ENV or use the default settings
|
||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
logger.info(f"Starting {n_workers} workers during app initialization")
|
||||
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||
threading.Thread(target=notification_runner).start()
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')):
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version).start()
|
||||
|
||||
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
|
||||
# This avoids circular dependencies
|
||||
return app
|
||||
|
||||
|
||||
@@ -502,73 +677,87 @@ def notification_runner():
|
||||
global notification_debug_log
|
||||
from datetime import datetime
|
||||
import json
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
# At the moment only one thread runs (single runner)
|
||||
n_object = notification_q.get(block=False)
|
||||
except queue.Empty:
|
||||
time.sleep(1)
|
||||
|
||||
else:
|
||||
|
||||
now = datetime.now()
|
||||
sent_obj = None
|
||||
|
||||
with app.app_context():
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
# At the moment only one thread runs (single runner)
|
||||
n_object = notification_q.get(block=False)
|
||||
except queue.Empty:
|
||||
time.sleep(1)
|
||||
|
||||
# Fallback to system config if not set
|
||||
if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
|
||||
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
|
||||
else:
|
||||
|
||||
if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
|
||||
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
|
||||
now = datetime.now()
|
||||
sent_obj = None
|
||||
|
||||
if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
|
||||
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
|
||||
if n_object.get('notification_urls', {}):
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
try:
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
# Fallback to system config if not set
|
||||
if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
|
||||
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
|
||||
|
||||
# UUID wont be present when we submit a 'test' from the global settings
|
||||
if 'uuid' in n_object:
|
||||
datastore.update_watch(uuid=n_object['uuid'],
|
||||
update_obj={'last_notification_error': "Notification error detected, goto notification log."})
|
||||
if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
|
||||
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
|
||||
|
||||
if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
|
||||
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
|
||||
if n_object.get('notification_urls', {}):
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
|
||||
# UUID wont be present when we submit a 'test' from the global settings
|
||||
if 'uuid' in n_object:
|
||||
datastore.update_watch(uuid=n_object['uuid'],
|
||||
update_obj={'last_notification_error': "Notification error detected, goto notification log."})
|
||||
|
||||
log_lines = str(e).splitlines()
|
||||
notification_debug_log += log_lines
|
||||
|
||||
with app.app_context():
|
||||
app.config['watch_check_update_SIGNAL'].send(app_context=app, watch_uuid=n_object.get('uuid'))
|
||||
|
||||
# Process notifications
|
||||
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
|
||||
# Trim the log length
|
||||
notification_debug_log = notification_debug_log[-100:]
|
||||
|
||||
log_lines = str(e).splitlines()
|
||||
notification_debug_log += log_lines
|
||||
|
||||
# Process notifications
|
||||
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
|
||||
# Trim the log length
|
||||
notification_debug_log = notification_debug_log[-100:]
|
||||
|
||||
# Threaded runner, look for new watches to feed into the Queue.
|
||||
def ticker_thread_check_time_launch_checks():
|
||||
import random
|
||||
from changedetectionio import update_worker
|
||||
proxy_last_called_time = {}
|
||||
last_health_check = 0
|
||||
|
||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}")
|
||||
|
||||
# Spin up Workers that do the fetching
|
||||
# Can be overriden by ENV or use the default settings
|
||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
for _ in range(n_workers):
|
||||
new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
|
||||
running_update_threads.append(new_worker)
|
||||
new_worker.start()
|
||||
# Workers are now started during app initialization, not here
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
|
||||
# Periodic worker health check (every 60 seconds)
|
||||
now = time.time()
|
||||
if now - last_health_check > 60:
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
health_result = worker_handler.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=app,
|
||||
datastore=datastore
|
||||
)
|
||||
|
||||
if health_result['status'] != 'healthy':
|
||||
logger.warning(f"Worker health check: {health_result['message']}")
|
||||
|
||||
last_health_check = now
|
||||
|
||||
# Get a list of watches by UUID that are currently fetching data
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
if t.current_uuid:
|
||||
running_uuids.append(t.current_uuid)
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
|
||||
watch_uuid_list = []
|
||||
@@ -614,7 +803,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
else:
|
||||
time_schedule_limit = watch.get('time_schedule_limit')
|
||||
logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
|
||||
tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
|
||||
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
|
||||
|
||||
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
||||
try:
|
||||
@@ -663,16 +852,22 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||
priority = int(time.time())
|
||||
logger.debug(
|
||||
f"> Queued watch UUID {uuid} "
|
||||
f"last checked at {watch['last_checked']} "
|
||||
f"queued at {now:0.2f} priority {priority} "
|
||||
f"jitter {watch.jitter_seconds:0.2f}s, "
|
||||
f"{now - watch['last_checked']:0.2f}s since last checked")
|
||||
|
||||
# Into the queue with you
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
|
||||
|
||||
queued_successfully = worker_handler.queue_item_async_safe(update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=priority,
|
||||
item={'uuid': uuid})
|
||||
)
|
||||
if queued_successfully:
|
||||
logger.debug(
|
||||
f"> Queued watch UUID {uuid} "
|
||||
f"last checked at {watch['last_checked']} "
|
||||
f"queued at {now:0.2f} priority {priority} "
|
||||
f"jitter {watch.jitter_seconds:0.2f}s, "
|
||||
f"{now - watch['last_checked']:0.2f}s since last checked")
|
||||
else:
|
||||
logger.critical(f"CRITICAL: Failed to queue watch UUID {uuid} in ticker thread!")
|
||||
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ from wtforms.widgets.core import TimeInput
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||
from changedetectionio.conditions.form import ConditionFormRow
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
from wtforms import (
|
||||
@@ -23,11 +24,11 @@ from wtforms import (
|
||||
)
|
||||
from flask_wtf.file import FileField, FileAllowed
|
||||
from wtforms.fields import FieldList
|
||||
from wtforms.utils import unset_value
|
||||
|
||||
from wtforms.validators import ValidationError
|
||||
|
||||
from validators.url import url as url_validator
|
||||
|
||||
from changedetectionio.widgets import TernaryNoneBooleanField
|
||||
|
||||
# default
|
||||
# each select <option data-enabled="enabled-0-0"
|
||||
@@ -54,6 +55,8 @@ valid_method = {
|
||||
|
||||
default_method = 'GET'
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.'
|
||||
|
||||
class StringListField(StringField):
|
||||
widget = widgets.TextArea()
|
||||
@@ -210,6 +213,35 @@ class ScheduleLimitForm(Form):
|
||||
self.sunday.form.enabled.label.text = "Sunday"
|
||||
|
||||
|
||||
def validate_time_between_check_has_values(form):
|
||||
"""
|
||||
Custom validation function for TimeBetweenCheckForm.
|
||||
Returns True if at least one time interval field has a value > 0.
|
||||
"""
|
||||
res = any([
|
||||
form.weeks.data and int(form.weeks.data) > 0,
|
||||
form.days.data and int(form.days.data) > 0,
|
||||
form.hours.data and int(form.hours.data) > 0,
|
||||
form.minutes.data and int(form.minutes.data) > 0,
|
||||
form.seconds.data and int(form.seconds.data) > 0
|
||||
])
|
||||
|
||||
return res
|
||||
|
||||
|
||||
class RequiredTimeInterval(object):
|
||||
"""
|
||||
WTForms validator that ensures at least one time interval field has a value > 0.
|
||||
Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]).
|
||||
"""
|
||||
def __init__(self, message=None):
|
||||
self.message = message or 'At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
||||
|
||||
def __call__(self, form, field):
|
||||
if not validate_time_between_check_has_values(field.form):
|
||||
raise ValidationError(self.message)
|
||||
|
||||
|
||||
class TimeBetweenCheckForm(Form):
|
||||
weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
@@ -218,33 +250,160 @@ class TimeBetweenCheckForm(Form):
|
||||
seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
# @todo add total seconds minimum validatior = minimum_seconds_recheck_time
|
||||
|
||||
def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
|
||||
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
||||
self.require_at_least_one = kwargs.get('require_at_least_one', False)
|
||||
self.require_at_least_one_message = kwargs.get('require_at_least_one_message', REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT)
|
||||
|
||||
def validate(self, **kwargs):
|
||||
"""Custom validation that can optionally require at least one time interval."""
|
||||
# Run normal field validation first
|
||||
if not super().validate(**kwargs):
|
||||
return False
|
||||
|
||||
# Apply optional "at least one" validation
|
||||
if self.require_at_least_one:
|
||||
if not validate_time_between_check_has_values(self):
|
||||
# Add error to the form's general errors (not field-specific)
|
||||
if not hasattr(self, '_formdata_errors'):
|
||||
self._formdata_errors = []
|
||||
self._formdata_errors.append(self.require_at_least_one_message)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class EnhancedFormField(FormField):
|
||||
"""
|
||||
An enhanced FormField that supports conditional validation with top-level error messages.
|
||||
Adds a 'top_errors' property for validation errors at the FormField level.
|
||||
"""
|
||||
|
||||
def __init__(self, form_class, label=None, validators=None, separator="-",
|
||||
conditional_field=None, conditional_message=None, conditional_test_function=None, **kwargs):
|
||||
"""
|
||||
Initialize EnhancedFormField with optional conditional validation.
|
||||
|
||||
:param conditional_field: Name of the field this FormField depends on (e.g. 'time_between_check_use_default')
|
||||
:param conditional_message: Error message to show when validation fails
|
||||
:param conditional_test_function: Custom function to test if FormField has valid values.
|
||||
Should take self.form as parameter and return True if valid.
|
||||
"""
|
||||
super().__init__(form_class, label, validators, separator, **kwargs)
|
||||
self.top_errors = []
|
||||
self.conditional_field = conditional_field
|
||||
self.conditional_message = conditional_message or "At least one field must have a value when not using defaults."
|
||||
self.conditional_test_function = conditional_test_function
|
||||
|
||||
def validate(self, form, extra_validators=()):
|
||||
"""
|
||||
Custom validation that supports conditional logic and stores top-level errors.
|
||||
"""
|
||||
self.top_errors = []
|
||||
|
||||
# First run the normal FormField validation
|
||||
base_valid = super().validate(form, extra_validators)
|
||||
|
||||
# Apply conditional validation if configured
|
||||
if self.conditional_field and hasattr(form, self.conditional_field):
|
||||
conditional_field_obj = getattr(form, self.conditional_field)
|
||||
|
||||
# If the conditional field is False/unchecked, check if this FormField has any values
|
||||
if not conditional_field_obj.data:
|
||||
# Use custom test function if provided, otherwise use generic fallback
|
||||
if self.conditional_test_function:
|
||||
has_any_value = self.conditional_test_function(self.form)
|
||||
else:
|
||||
# Generic fallback - check if any field has truthy data
|
||||
has_any_value = any(field.data for field in self.form if hasattr(field, 'data') and field.data)
|
||||
|
||||
if not has_any_value:
|
||||
self.top_errors.append(self.conditional_message)
|
||||
base_valid = False
|
||||
|
||||
return base_valid
|
||||
|
||||
|
||||
class RequiredFormField(FormField):
|
||||
"""
|
||||
A FormField that passes require_at_least_one=True to TimeBetweenCheckForm.
|
||||
Use this when you want the sub-form to always require at least one value.
|
||||
"""
|
||||
|
||||
def __init__(self, form_class, label=None, validators=None, separator="-", **kwargs):
|
||||
super().__init__(form_class, label, validators, separator, **kwargs)
|
||||
|
||||
def process(self, formdata, data=unset_value, extra_filters=None):
|
||||
if extra_filters:
|
||||
raise TypeError(
|
||||
"FormField cannot take filters, as the encapsulated"
|
||||
"data is not mutable."
|
||||
)
|
||||
|
||||
if data is unset_value:
|
||||
try:
|
||||
data = self.default()
|
||||
except TypeError:
|
||||
data = self.default
|
||||
self._obj = data
|
||||
|
||||
self.object_data = data
|
||||
|
||||
prefix = self.name + self.separator
|
||||
# Pass require_at_least_one=True to the sub-form
|
||||
if isinstance(data, dict):
|
||||
self.form = self.form_class(formdata=formdata, prefix=prefix, require_at_least_one=True, **data)
|
||||
else:
|
||||
self.form = self.form_class(formdata=formdata, obj=data, prefix=prefix, require_at_least_one=True)
|
||||
|
||||
@property
|
||||
def errors(self):
|
||||
"""Include sub-form validation errors"""
|
||||
form_errors = self.form.errors
|
||||
# Add any general form errors to a special 'form' key
|
||||
if hasattr(self.form, '_formdata_errors') and self.form._formdata_errors:
|
||||
form_errors = dict(form_errors) # Make a copy
|
||||
form_errors['form'] = self.form._formdata_errors
|
||||
return form_errors
|
||||
|
||||
|
||||
# Separated by key:value
|
||||
class StringDictKeyValue(StringField):
|
||||
widget = widgets.TextArea()
|
||||
|
||||
def _value(self):
|
||||
if self.data:
|
||||
output = u''
|
||||
for k in self.data.keys():
|
||||
output += "{}: {}\r\n".format(k, self.data[k])
|
||||
|
||||
output = ''
|
||||
for k, v in self.data.items():
|
||||
output += f"{k}: {v}\r\n"
|
||||
return output
|
||||
else:
|
||||
return u''
|
||||
return ''
|
||||
|
||||
# incoming
|
||||
# incoming data processing + validation
|
||||
def process_formdata(self, valuelist):
|
||||
self.data = {}
|
||||
errors = []
|
||||
if valuelist:
|
||||
self.data = {}
|
||||
# Remove empty strings
|
||||
cleaned = list(filter(None, valuelist[0].split("\n")))
|
||||
for s in cleaned:
|
||||
parts = s.strip().split(':', 1)
|
||||
if len(parts) == 2:
|
||||
self.data.update({parts[0].strip(): parts[1].strip()})
|
||||
# Remove empty strings (blank lines)
|
||||
cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()]
|
||||
for idx, s in enumerate(cleaned, start=1):
|
||||
if ':' not in s:
|
||||
errors.append(f"Line {idx} is missing a ':' separator.")
|
||||
continue
|
||||
parts = s.split(':', 1)
|
||||
key = parts[0].strip()
|
||||
value = parts[1].strip()
|
||||
|
||||
else:
|
||||
self.data = {}
|
||||
if not key:
|
||||
errors.append(f"Line {idx} has an empty key.")
|
||||
if not value:
|
||||
errors.append(f"Line {idx} has an empty value.")
|
||||
|
||||
self.data[key] = value
|
||||
|
||||
if errors:
|
||||
raise ValidationError("Invalid input:\n" + "\n".join(errors))
|
||||
|
||||
class ValidateContentFetcherIsReady(object):
|
||||
"""
|
||||
@@ -308,11 +467,16 @@ class ValidateAppRiseServers(object):
|
||||
import apprise
|
||||
from .notification.apprise_plugin.assets import apprise_asset
|
||||
from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler # noqa: F401
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
|
||||
apobj = apprise.Apprise(asset=apprise_asset)
|
||||
|
||||
for server_url in field.data:
|
||||
url = server_url.strip()
|
||||
generic_notification_context_data = NotificationContextData()
|
||||
# Make sure something is atleast in all those regular token fields
|
||||
generic_notification_context_data.set_random_for_validation()
|
||||
|
||||
url = jinja_render(template_str=server_url.strip(), **generic_notification_context_data).strip()
|
||||
if url.startswith("#"):
|
||||
continue
|
||||
|
||||
@@ -326,9 +490,8 @@ class ValidateJinja2Template(object):
|
||||
"""
|
||||
def __call__(self, form, field):
|
||||
from changedetectionio import notification
|
||||
|
||||
from changedetectionio.jinja2_custom import create_jinja_env
|
||||
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
|
||||
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
||||
from jinja2.meta import find_undeclared_variables
|
||||
import jinja2.exceptions
|
||||
|
||||
@@ -336,9 +499,11 @@ class ValidateJinja2Template(object):
|
||||
joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}"
|
||||
|
||||
try:
|
||||
jinja2_env = ImmutableSandboxedEnvironment(loader=BaseLoader)
|
||||
jinja2_env.globals.update(notification.valid_tokens)
|
||||
# Extra validation tokens provided on the form_class(... extra_tokens={}) setup
|
||||
# Use the shared helper to create a properly configured environment
|
||||
jinja2_env = create_jinja_env(loader=BaseLoader)
|
||||
|
||||
# Add notification tokens for validation
|
||||
jinja2_env.globals.update(NotificationContextData())
|
||||
if hasattr(field, 'extra_notification_tokens'):
|
||||
jinja2_env.globals.update(field.extra_notification_tokens)
|
||||
|
||||
@@ -350,6 +515,7 @@ class ValidateJinja2Template(object):
|
||||
except jinja2.exceptions.SecurityError as e:
|
||||
raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e
|
||||
|
||||
# Check for undeclared variables
|
||||
ast = jinja2_env.parse(joined_data)
|
||||
undefined = ", ".join(find_undeclared_variables(ast))
|
||||
if undefined:
|
||||
@@ -372,19 +538,23 @@ class validateURL(object):
|
||||
|
||||
|
||||
def validate_url(test_url):
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
try:
|
||||
url_validator(test_url, simple_host=allow_simplehost)
|
||||
except validators.ValidationError:
|
||||
#@todo check for xss
|
||||
message = f"'{test_url}' is not a valid URL."
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
if not is_safe_valid_url(test_url):
|
||||
# This should be wtforms.validators.
|
||||
raise ValidationError(message)
|
||||
raise ValidationError('Watch protocol is not permitted or invalid URL format')
|
||||
|
||||
|
||||
class ValidateSinglePythonRegexString(object):
|
||||
def __init__(self, message=None):
|
||||
self.message = message
|
||||
|
||||
def __call__(self, form, field):
|
||||
try:
|
||||
re.compile(field.data)
|
||||
except re.error:
|
||||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
||||
raise ValidationError(message % (field.data))
|
||||
|
||||
from .model.Watch import is_safe_url
|
||||
if not is_safe_url(test_url):
|
||||
# This should be wtforms.validators.
|
||||
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
|
||||
|
||||
class ValidateListRegex(object):
|
||||
"""
|
||||
@@ -404,6 +574,7 @@ class ValidateListRegex(object):
|
||||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
||||
raise ValidationError(message % (line))
|
||||
|
||||
|
||||
class ValidateCSSJSONXPATHInput(object):
|
||||
"""
|
||||
Filter validation
|
||||
@@ -503,6 +674,51 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
except:
|
||||
raise ValidationError("A system-error occurred when validating your jq expression")
|
||||
|
||||
class ValidateSimpleURL:
|
||||
"""Validate that the value can be parsed by urllib.parse.urlparse() and has a scheme/netloc."""
|
||||
def __init__(self, message=None):
|
||||
self.message = message or "Invalid URL."
|
||||
|
||||
def __call__(self, form, field):
|
||||
data = (field.data or "").strip()
|
||||
if not data:
|
||||
return # empty is OK — pair with validators.Optional()
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(data)
|
||||
if not parsed.scheme or not parsed.netloc:
|
||||
raise ValidationError(self.message)
|
||||
|
||||
class ValidateStartsWithRegex(object):
|
||||
def __init__(self, regex, *, flags=0, message=None, allow_empty=True, split_lines=True):
|
||||
# compile with given flags (we’ll pass re.IGNORECASE below)
|
||||
self.pattern = re.compile(regex, flags) if isinstance(regex, str) else regex
|
||||
self.message = message
|
||||
self.allow_empty = allow_empty
|
||||
self.split_lines = split_lines
|
||||
|
||||
def __call__(self, form, field):
|
||||
data = field.data
|
||||
if not data:
|
||||
return
|
||||
|
||||
# normalize into list of lines
|
||||
if isinstance(data, str) and self.split_lines:
|
||||
lines = data.splitlines()
|
||||
elif isinstance(data, (list, tuple)):
|
||||
lines = data
|
||||
else:
|
||||
lines = [data]
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
if self.allow_empty:
|
||||
continue
|
||||
raise ValidationError(self.message or "Empty value not allowed.")
|
||||
if not self.pattern.match(stripped):
|
||||
raise ValidationError(self.message or "Invalid value.")
|
||||
|
||||
class quickWatchForm(Form):
|
||||
from . import processors
|
||||
|
||||
@@ -513,7 +729,6 @@ class quickWatchForm(Form):
|
||||
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
|
||||
# Common to a single watch and the global settings
|
||||
class commonSettingsForm(Form):
|
||||
from . import processors
|
||||
@@ -524,16 +739,23 @@ class commonSettingsForm(Form):
|
||||
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
|
||||
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
|
||||
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
|
||||
notification_format = SelectField('Notification format', choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
|
||||
timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
|
||||
|
||||
# Not true anymore but keep the validate_ hook for future use, we convert color tags
|
||||
# def validate_notification_urls(self, field):
|
||||
# """Validate that HTML Color format is not used with Telegram"""
|
||||
# if self.notification_format.data == 'HTML Color' and field.data:
|
||||
# for url in field.data:
|
||||
# if url and ('tgram://' in url or 'discord://' in url or 'discord.com/api/webhooks' in url):
|
||||
# raise ValidationError('HTML Color format is not supported by Telegram and Discord. Please choose another Notification Format (Plain Text, HTML, or Markdown to HTML).')
|
||||
|
||||
|
||||
class importForm(Form):
|
||||
from . import processors
|
||||
@@ -558,11 +780,16 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()], default='')
|
||||
|
||||
time_between_check = FormField(TimeBetweenCheckForm)
|
||||
time_between_check = EnhancedFormField(
|
||||
TimeBetweenCheckForm,
|
||||
conditional_field='time_between_check_use_default',
|
||||
conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT,
|
||||
conditional_test_function=validate_time_between_check_has_values
|
||||
)
|
||||
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
|
||||
time_between_check_use_default = BooleanField('Use global settings for time between check', default=False)
|
||||
time_between_check_use_default = BooleanField('Use global settings for time between check and scheduler.', default=False)
|
||||
|
||||
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
||||
|
||||
@@ -580,6 +807,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
|
||||
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
|
||||
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
|
||||
strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None)
|
||||
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
|
||||
|
||||
filter_text_added = BooleanField('Added lines', default=True)
|
||||
@@ -592,18 +820,18 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
proxy = RadioField('Proxy')
|
||||
# filter_failure_notification_send @todo make ternary
|
||||
filter_failure_notification_send = BooleanField(
|
||||
'Send a notification when the filter can no longer be found on the page', default=False)
|
||||
|
||||
notification_muted = BooleanField('Notifications Muted / Off', default=False)
|
||||
notification_muted = TernaryNoneBooleanField('Notifications', default=None, yes_text="Muted", no_text="On")
|
||||
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
||||
|
||||
conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL')
|
||||
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
||||
|
||||
use_page_title_in_list = TernaryNoneBooleanField('Use page <title> in list', default=None)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return None
|
||||
@@ -615,7 +843,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
if not super().validate():
|
||||
return False
|
||||
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
result = True
|
||||
|
||||
# Fail form validation when a body is set for a GET
|
||||
@@ -678,23 +906,36 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
):
|
||||
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
||||
if kwargs and kwargs.get('default_system_settings'):
|
||||
default_tz = kwargs.get('default_system_settings').get('application', {}).get('timezone')
|
||||
default_tz = kwargs.get('default_system_settings').get('application', {}).get('scheduler_timezone_default')
|
||||
if default_tz:
|
||||
self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz
|
||||
|
||||
|
||||
|
||||
class SingleExtraProxy(Form):
|
||||
|
||||
# maybe better to set some <script>var..
|
||||
proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
|
||||
# @todo do the validation here instead
|
||||
proxy_url = StringField('Proxy URL', [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(https?|socks5)://', # ✅ main pattern
|
||||
flags=re.IGNORECASE, # ✅ makes it case-insensitive
|
||||
message='Proxy URLs must start with http://, https:// or socks5://',
|
||||
),
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
|
||||
|
||||
class SingleExtraBrowser(Form):
|
||||
browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||
# @todo do the validation here instead
|
||||
browser_connection_url = StringField('Browser connection URL', [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(wss?|ws)://',
|
||||
flags=re.IGNORECASE,
|
||||
message='Browser URLs must start with wss:// or ws://'
|
||||
),
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||
|
||||
class DefaultUAInputForm(Form):
|
||||
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
@@ -703,12 +944,23 @@ class DefaultUAInputForm(Form):
|
||||
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = FormField(TimeBetweenCheckForm)
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm)
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
proxy = RadioField('Proxy')
|
||||
proxy = RadioField('Default proxy')
|
||||
jitter_seconds = IntegerField('Random jitter seconds ± check',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
|
||||
workers = IntegerField('Number of fetch workers',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=1, max=50,
|
||||
message="Should be between 1 and 50")])
|
||||
|
||||
timeout = IntegerField('Requests timeout in seconds',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=1, max=999,
|
||||
message="Should be between 1 and 999")])
|
||||
|
||||
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
||||
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
||||
|
||||
@@ -722,7 +974,10 @@ class globalSettingsRequestForm(Form):
|
||||
return False
|
||||
|
||||
class globalSettingsApplicationUIForm(Form):
|
||||
open_diff_in_new_tab = BooleanField('Open diff page in a new tab', default=True, validators=[validators.Optional()])
|
||||
open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()])
|
||||
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
|
||||
favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
|
||||
use_page_title_in_list = BooleanField('Use page <title> in watch overview list') #BooleanField=True
|
||||
|
||||
# datastore.data['settings']['application']..
|
||||
class globalSettingsApplicationForm(commonSettingsForm):
|
||||
@@ -747,9 +1002,14 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
||||
shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()])
|
||||
strip_ignored_lines = BooleanField('Strip ignored lines')
|
||||
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
|
||||
validators=[validators.Optional()])
|
||||
|
||||
rss_reader_mode = BooleanField('RSS reader mode ', default=False,
|
||||
validators=[validators.Optional()])
|
||||
|
||||
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
@@ -769,9 +1029,9 @@ class globalSettingsForm(Form):
|
||||
|
||||
requests = FormField(globalSettingsRequestForm)
|
||||
application = FormField(globalSettingsApplicationForm)
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
class extractDataForm(Form):
|
||||
extract_regex = StringField('RegEx to extract', validators=[validators.Length(min=1, message="Needs a RegEx")])
|
||||
extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
|
||||
extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from functools import lru_cache
|
||||
|
||||
from loguru import logger
|
||||
from lxml import etree
|
||||
from typing import List
|
||||
import html
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -9,6 +11,10 @@ TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
|
||||
TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
|
||||
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||
|
||||
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
|
||||
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
|
||||
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
|
||||
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
|
||||
@@ -17,9 +23,9 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
def perl_style_slash_enclosed_regex_to_options(regex):
|
||||
|
||||
res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE)
|
||||
@@ -52,13 +58,17 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
|
||||
|
||||
return html_block
|
||||
|
||||
def subtractive_css_selector(css_selector, html_content):
|
||||
def subtractive_css_selector(css_selector, content):
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
# So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
|
||||
elements_to_remove = soup.select(css_selector)
|
||||
|
||||
if not elements_to_remove:
|
||||
# Better to return the original that rebuild with BeautifulSoup
|
||||
return content
|
||||
|
||||
# Then, remove them in a separate loop
|
||||
for item in elements_to_remove:
|
||||
item.decompose()
|
||||
@@ -66,6 +76,7 @@ def subtractive_css_selector(css_selector, html_content):
|
||||
return str(soup)
|
||||
|
||||
def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
|
||||
from lxml import etree
|
||||
# Parse the HTML content using lxml
|
||||
html_tree = etree.HTML(html_content)
|
||||
|
||||
@@ -77,6 +88,10 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
|
||||
# Collect elements for each selector
|
||||
elements_to_remove.extend(html_tree.xpath(selector))
|
||||
|
||||
# If no elements were found, return the original HTML content
|
||||
if not elements_to_remove:
|
||||
return html_content
|
||||
|
||||
# Then, remove them in a separate loop
|
||||
for element in elements_to_remove:
|
||||
if element.getparent() is not None: # Ensure the element has a parent before removing
|
||||
@@ -94,7 +109,7 @@ def element_removal(selectors: List[str], html_content):
|
||||
xpath_selectors = []
|
||||
|
||||
for selector in selectors:
|
||||
if selector.startswith(('xpath:', 'xpath1:', '//')):
|
||||
if selector.strip().startswith(('xpath:', 'xpath1:', '//')):
|
||||
# Handle XPath selectors separately
|
||||
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
|
||||
xpath_selectors.append(xpath_selector)
|
||||
@@ -171,8 +186,21 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
|
||||
html_block = ""
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
|
||||
#@note: //title/text() wont work where <title>CDATA..
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
# Solution: Register the default namespace with empty string prefix in elementpath
|
||||
# This is primarily for RSS/Atom feeds but works for any XML with default namespace
|
||||
if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
|
||||
# Register the default namespace with empty string prefix for elementpath
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
if type(r) != list:
|
||||
r = [r]
|
||||
@@ -207,8 +235,19 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
|
||||
html_block = ""
|
||||
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
|
||||
#@note: //title/text() wont work where <title>CDATA..
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
|
||||
# For documents with default namespace (RSS/Atom feeds), users must use:
|
||||
# - local-name(): //*[local-name()='title']/text()
|
||||
# - Or use xpath_filter (not xpath1_filter) which supports default namespaces
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
|
||||
#@note: xpath1 (lxml) does NOT automatically handle default namespaces
|
||||
#@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
@@ -289,70 +328,92 @@ def _get_stripped_text_from_json_match(match):
|
||||
|
||||
return stripped_text_from_html
|
||||
|
||||
def extract_json_blob_from_html(content, ensure_is_ldjson_info_type, json_filter):
|
||||
from bs4 import BeautifulSoup
|
||||
stripped_text_from_html = ''
|
||||
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
# As a last resort, try to parse the whole <body>
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
|
||||
if ensure_is_ldjson_info_type:
|
||||
bs_result = soup.find_all('script', {"type": "application/ld+json"})
|
||||
else:
|
||||
bs_result = soup.find_all('script')
|
||||
bs_result += soup.find_all('body')
|
||||
|
||||
bs_jsons = []
|
||||
|
||||
for result in bs_result:
|
||||
# result.text is how bs4 magically strips JSON from the body
|
||||
content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else ''
|
||||
# Skip empty tags, and things that dont even look like JSON
|
||||
if not result.text or not (content_start[0] == '{' or content_start[0] == '['):
|
||||
continue
|
||||
try:
|
||||
json_data = json.loads(result.text)
|
||||
bs_jsons.append(json_data)
|
||||
except json.JSONDecodeError:
|
||||
# Skip objects which cannot be parsed
|
||||
continue
|
||||
|
||||
if not bs_jsons:
|
||||
raise JSONNotFound("No parsable JSON found in this document")
|
||||
|
||||
for json_data in bs_jsons:
|
||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||
|
||||
if ensure_is_ldjson_info_type:
|
||||
# Could sometimes be list, string or something else random
|
||||
if isinstance(json_data, dict):
|
||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
|
||||
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
|
||||
# 1833 - could be either str or dict, should not be anything else
|
||||
|
||||
t = json_data.get('@type')
|
||||
if t and stripped_text_from_html:
|
||||
|
||||
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
|
||||
break
|
||||
# The non-standard part, some have a list
|
||||
elif isinstance(t, list):
|
||||
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
|
||||
break
|
||||
|
||||
elif stripped_text_from_html:
|
||||
break
|
||||
|
||||
return stripped_text_from_html
|
||||
|
||||
# content - json
|
||||
# json_filter - ie json:$..price
|
||||
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
|
||||
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
stripped_text_from_html = False
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||
try:
|
||||
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
||||
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(str(e))
|
||||
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
# As a last resort, try to parse the whole <body>
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
# Looks like clean JSON, dont bother extracting from HTML
|
||||
|
||||
if ensure_is_ldjson_info_type:
|
||||
bs_result = soup.findAll('script', {"type": "application/ld+json"})
|
||||
else:
|
||||
bs_result = soup.findAll('script')
|
||||
bs_result += soup.findAll('body')
|
||||
content_start = content.lstrip("\ufeff").strip()[:100]
|
||||
|
||||
bs_jsons = []
|
||||
for result in bs_result:
|
||||
# Skip empty tags, and things that dont even look like JSON
|
||||
if not result.text or '{' not in result.text:
|
||||
continue
|
||||
try:
|
||||
json_data = json.loads(result.text)
|
||||
bs_jsons.append(json_data)
|
||||
except json.JSONDecodeError:
|
||||
# Skip objects which cannot be parsed
|
||||
continue
|
||||
|
||||
if not bs_jsons:
|
||||
raise JSONNotFound("No parsable JSON found in this document")
|
||||
|
||||
for json_data in bs_jsons:
|
||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||
|
||||
if ensure_is_ldjson_info_type:
|
||||
# Could sometimes be list, string or something else random
|
||||
if isinstance(json_data, dict):
|
||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
|
||||
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
|
||||
# 1833 - could be either str or dict, should not be anything else
|
||||
|
||||
t = json_data.get('@type')
|
||||
if t and stripped_text_from_html:
|
||||
|
||||
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
|
||||
break
|
||||
# The non-standard part, some have a list
|
||||
elif isinstance(t, list):
|
||||
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
|
||||
break
|
||||
|
||||
elif stripped_text_from_html:
|
||||
break
|
||||
if content_start[0] == '{' or content_start[0] == '[':
|
||||
try:
|
||||
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
||||
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||
else:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter )
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
@@ -371,7 +432,13 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
||||
ignore_regex_multiline = []
|
||||
ignored_lines = []
|
||||
|
||||
if not content:
|
||||
return ''
|
||||
|
||||
for k in wordlist:
|
||||
# Skip empty strings to avoid matching everything
|
||||
if not k or not k.strip():
|
||||
continue
|
||||
# Is it a regex?
|
||||
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
|
||||
if res:
|
||||
@@ -436,55 +503,27 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
|
||||
return re.sub(pattern, repl, html_content)
|
||||
|
||||
|
||||
def html_to_text_sub_worker(conn, html_content: str, render_anchor_tag_content=False, is_rss=False):
|
||||
# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
|
||||
|
||||
|
||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
|
||||
from inscriptis import get_text
|
||||
from inscriptis.model.config import ParserConfig
|
||||
|
||||
"""Converts html string to a string with just the text. If ignoring
|
||||
rendering anchor tag content is enable, anchor tag content are also
|
||||
included in the text
|
||||
|
||||
:param html_content: string with html content
|
||||
:param render_anchor_tag_content: boolean flag indicating whether to extract
|
||||
hyperlinks (the anchor tag content) together with text. This refers to the
|
||||
'href' inside 'a' tags.
|
||||
Anchor tag content is rendered in the following manner:
|
||||
'[ text ](anchor tag content)'
|
||||
:return: extracted text from the HTML
|
||||
"""
|
||||
# if anchor tag content flag is set to True define a config for
|
||||
# extracting this content
|
||||
if render_anchor_tag_content:
|
||||
parser_config = ParserConfig(
|
||||
annotation_rules={"a": ["hyperlink"]},
|
||||
display_links=True
|
||||
)
|
||||
# otherwise set config to None/default
|
||||
else:
|
||||
parser_config = None
|
||||
|
||||
# RSS Mode - Inscriptis will treat `title` as something else.
|
||||
# Make it as a regular block display element (//item/title)
|
||||
# This is a bit of a hack - the real way it to use XSLT to convert it to HTML #1874
|
||||
if is_rss:
|
||||
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
|
||||
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
||||
|
||||
text_content = get_text(html_content, config=parser_config)
|
||||
conn.send(text_content)
|
||||
conn.close()
|
||||
|
||||
# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
|
||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False):
|
||||
from multiprocessing import Process, Pipe
|
||||
|
||||
parent_conn, child_conn = Pipe()
|
||||
p = Process(target=html_to_text_sub_worker, args=(child_conn, html_content, render_anchor_tag_content, is_rss))
|
||||
p.start()
|
||||
text = parent_conn.recv()
|
||||
p.join()
|
||||
return text
|
||||
return text_content
|
||||
|
||||
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
||||
def has_ldjson_product_info(content):
|
||||
@@ -538,3 +577,43 @@ def get_triggered_text(content, trigger_text):
|
||||
i += 1
|
||||
|
||||
return triggered_text
|
||||
|
||||
|
||||
def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int = 8192) -> str | None:
|
||||
try:
|
||||
# Only decode/process the prefix we need for title extraction
|
||||
match data:
|
||||
case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")):
|
||||
prefix = data[:scan_chars * 2].decode("utf-16", errors="replace")
|
||||
case bytes() if data.startswith((b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff")):
|
||||
prefix = data[:scan_chars * 4].decode("utf-32", errors="replace")
|
||||
case bytes():
|
||||
try:
|
||||
prefix = data[:scan_chars].decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
head = data[:sniff_bytes].decode("ascii", errors="ignore")
|
||||
if m := (META_CS.search(head) or META_CT.search(head)):
|
||||
enc = m.group(1).lower()
|
||||
else:
|
||||
enc = "cp1252"
|
||||
prefix = data[:scan_chars * 2].decode(enc, errors="replace")
|
||||
except Exception as e:
|
||||
logger.error(f"Title extraction encoding detection failed: {e}")
|
||||
return None
|
||||
case str():
|
||||
prefix = data[:scan_chars] if len(data) > scan_chars else data
|
||||
case _:
|
||||
logger.error(f"Title extraction received unsupported data type: {type(data)}")
|
||||
return None
|
||||
|
||||
# Search only in the prefix
|
||||
if m := TITLE_RE.search(prefix):
|
||||
title = html.unescape(" ".join(m.group(1).split())).strip()
|
||||
# Some safe limit
|
||||
return title[:2000]
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Title extraction failed: {e}")
|
||||
return None
|
||||
22
changedetectionio/jinja2_custom/__init__.py
Normal file
22
changedetectionio/jinja2_custom/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
Jinja2 custom extensions and safe rendering utilities.
|
||||
"""
|
||||
from .extensions.TimeExtension import TimeExtension
|
||||
from .safe_jinja import (
|
||||
render,
|
||||
render_fully_escaped,
|
||||
create_jinja_env,
|
||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE,
|
||||
DEFAULT_JINJA2_EXTENSIONS,
|
||||
)
|
||||
from .plugins.regex import regex_replace
|
||||
|
||||
__all__ = [
|
||||
'TimeExtension',
|
||||
'render',
|
||||
'render_fully_escaped',
|
||||
'create_jinja_env',
|
||||
'JINJA2_MAX_RETURN_PAYLOAD_SIZE',
|
||||
'DEFAULT_JINJA2_EXTENSIONS',
|
||||
'regex_replace',
|
||||
]
|
||||
221
changedetectionio/jinja2_custom/extensions/TimeExtension.py
Normal file
221
changedetectionio/jinja2_custom/extensions/TimeExtension.py
Normal file
@@ -0,0 +1,221 @@
|
||||
"""
|
||||
Jinja2 TimeExtension - Custom date/time handling for templates.
|
||||
|
||||
This extension provides the {% now %} tag for Jinja2 templates, offering timezone-aware
|
||||
date/time formatting with support for time offsets.
|
||||
|
||||
Why This Extension Exists:
|
||||
The Arrow library has a now() function (arrow.now()), but Jinja2 templates cannot
|
||||
directly call Python functions - they need extensions or filters to expose functionality.
|
||||
|
||||
This TimeExtension serves as a Jinja2-to-Arrow bridge that:
|
||||
|
||||
1. Makes Arrow accessible in templates - Jinja2 requires registering functions/tags
|
||||
through extensions. You cannot use arrow.now() directly in a template.
|
||||
|
||||
2. Provides template-friendly syntax - Instead of complex Python code, you get clean tags:
|
||||
{% now 'UTC' %}
|
||||
{% now 'UTC' + 'hours=2' %}
|
||||
{% now 'Europe/London', '%Y-%m-%d' %}
|
||||
|
||||
3. Adds convenience features on top of Arrow:
|
||||
- Default timezone from environment variable (TZ) or config
|
||||
- Default datetime format configuration
|
||||
- Offset syntax parsing: 'hours=2,minutes=30' → shift(hours=2, minutes=30)
|
||||
- Empty string timezone support to use configured defaults
|
||||
|
||||
4. Maintains security - Works within Jinja2's sandboxed environment so users
|
||||
cannot access arbitrary Python code or objects.
|
||||
|
||||
Essentially, this is a Jinja2 wrapper around arrow.now() and arrow.shift() that
|
||||
provides user-friendly template syntax while maintaining security.
|
||||
|
||||
Basic Usage:
|
||||
{% now 'UTC' %}
|
||||
# Output: Wed, 09 Dec 2015 23:33:01
|
||||
|
||||
Custom Format:
|
||||
{% now 'UTC', '%Y-%m-%d %H:%M:%S' %}
|
||||
# Output: 2015-12-09 23:33:01
|
||||
|
||||
Timezone Support:
|
||||
{% now 'America/New_York' %}
|
||||
{% now 'Europe/London' %}
|
||||
{% now '' %} # Uses default timezone from environment.default_timezone
|
||||
|
||||
Time Offsets (Addition):
|
||||
{% now 'UTC' + 'hours=2' %}
|
||||
{% now 'UTC' + 'hours=2,minutes=30' %}
|
||||
{% now 'UTC' + 'days=1,hours=2,minutes=15,seconds=10' %}
|
||||
|
||||
Time Offsets (Subtraction):
|
||||
{% now 'UTC' - 'minutes=11' %}
|
||||
{% now 'UTC' - 'days=2,minutes=33,seconds=1' %}
|
||||
|
||||
Time Offsets with Custom Format:
|
||||
{% now 'UTC' + 'hours=2', '%Y-%m-%d %H:%M:%S' %}
|
||||
# Output: 2015-12-10 01:33:01
|
||||
|
||||
Weekday Support (for finding next/previous weekday):
|
||||
{% now 'UTC' + 'weekday=0' %} # Next Monday (0=Monday, 6=Sunday)
|
||||
{% now 'UTC' + 'weekday=4' %} # Next Friday
|
||||
|
||||
Configuration:
|
||||
- Default timezone: Set via TZ environment variable or override environment.default_timezone
|
||||
- Default format: '%a, %d %b %Y %H:%M:%S' (can be overridden via environment.datetime_format)
|
||||
|
||||
Environment Customization:
|
||||
from changedetectionio.jinja2_custom import create_jinja_env
|
||||
|
||||
jinja2_env = create_jinja_env()
|
||||
jinja2_env.default_timezone = 'America/New_York' # Override default timezone
|
||||
jinja2_env.datetime_format = '%Y-%m-%d %H:%M' # Override default format
|
||||
|
||||
Supported Offset Parameters:
|
||||
- years, months, weeks, days
|
||||
- hours, minutes, seconds, microseconds
|
||||
- weekday (0=Monday through 6=Sunday, must be integer)
|
||||
|
||||
Note:
|
||||
This extension uses the Arrow library for timezone-aware datetime handling.
|
||||
All timezone names should be valid IANA timezone identifiers (e.g., 'America/New_York').
|
||||
"""
|
||||
import arrow
|
||||
|
||||
from jinja2 import nodes
|
||||
from jinja2.ext import Extension
|
||||
import os
|
||||
|
||||
class TimeExtension(Extension):
|
||||
"""
|
||||
Jinja2 Extension providing the {% now %} tag for timezone-aware date/time rendering.
|
||||
|
||||
This extension adds two attributes to the Jinja2 environment:
|
||||
- datetime_format: Default strftime format string (default: '%a, %d %b %Y %H:%M:%S')
|
||||
- default_timezone: Default timezone for rendering (default: TZ env var or 'UTC')
|
||||
|
||||
Both can be overridden after environment creation by setting the attributes directly.
|
||||
"""
|
||||
|
||||
tags = {'now'}
|
||||
|
||||
def __init__(self, environment):
|
||||
"""Jinja2 Extension constructor."""
|
||||
super().__init__(environment)
|
||||
|
||||
environment.extend(
|
||||
datetime_format='%a, %d %b %Y %H:%M:%S',
|
||||
default_timezone=os.getenv('TZ', 'UTC').strip()
|
||||
)
|
||||
|
||||
def _datetime(self, timezone, operator, offset, datetime_format):
|
||||
"""
|
||||
Get current datetime with time offset applied.
|
||||
|
||||
Args:
|
||||
timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
|
||||
operator: '+' for addition or '-' for subtraction
|
||||
offset: Comma-separated offset parameters (e.g., 'hours=2,minutes=30')
|
||||
datetime_format: strftime format string or None to use environment default
|
||||
|
||||
Returns:
|
||||
Formatted datetime string with offset applied
|
||||
|
||||
Example:
|
||||
_datetime('UTC', '+', 'hours=2,minutes=30', '%Y-%m-%d %H:%M:%S')
|
||||
# Returns current time + 2.5 hours
|
||||
"""
|
||||
# Use default timezone if none specified
|
||||
if not timezone or timezone == '':
|
||||
timezone = self.environment.default_timezone
|
||||
|
||||
d = arrow.now(timezone)
|
||||
|
||||
# parse shift params from offset and include operator
|
||||
shift_params = {}
|
||||
for param in offset.split(','):
|
||||
interval, value = param.split('=')
|
||||
shift_params[interval.strip()] = float(operator + value.strip())
|
||||
|
||||
# Fix weekday parameter can not be float
|
||||
if 'weekday' in shift_params:
|
||||
shift_params['weekday'] = int(shift_params['weekday'])
|
||||
|
||||
d = d.shift(**shift_params)
|
||||
|
||||
if datetime_format is None:
|
||||
datetime_format = self.environment.datetime_format
|
||||
return d.strftime(datetime_format)
|
||||
|
||||
def _now(self, timezone, datetime_format):
|
||||
"""
|
||||
Get current datetime without any offset.
|
||||
|
||||
Args:
|
||||
timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
|
||||
datetime_format: strftime format string or None to use environment default
|
||||
|
||||
Returns:
|
||||
Formatted datetime string for current time
|
||||
|
||||
Example:
|
||||
_now('America/New_York', '%Y-%m-%d %H:%M:%S')
|
||||
# Returns current time in New York timezone
|
||||
"""
|
||||
# Use default timezone if none specified
|
||||
if not timezone or timezone == '':
|
||||
timezone = self.environment.default_timezone
|
||||
|
||||
if datetime_format is None:
|
||||
datetime_format = self.environment.datetime_format
|
||||
return arrow.now(timezone).strftime(datetime_format)
|
||||
|
||||
def parse(self, parser):
|
||||
"""
|
||||
Parse the {% now %} tag and generate appropriate AST nodes.
|
||||
|
||||
This method is called by Jinja2 when it encounters a {% now %} tag.
|
||||
It parses the tag syntax and determines whether to call _now() or _datetime()
|
||||
based on whether offset operations (+ or -) are present.
|
||||
|
||||
Supported syntax:
|
||||
{% now 'timezone' %} -> calls _now()
|
||||
{% now 'timezone', 'format' %} -> calls _now()
|
||||
{% now 'timezone' + 'offset' %} -> calls _datetime()
|
||||
{% now 'timezone' + 'offset', 'format' %} -> calls _datetime()
|
||||
{% now 'timezone' - 'offset', 'format' %} -> calls _datetime()
|
||||
|
||||
Args:
|
||||
parser: Jinja2 parser instance
|
||||
|
||||
Returns:
|
||||
nodes.Output: AST output node containing the formatted datetime string
|
||||
"""
|
||||
lineno = next(parser.stream).lineno
|
||||
|
||||
node = parser.parse_expression()
|
||||
|
||||
if parser.stream.skip_if('comma'):
|
||||
datetime_format = parser.parse_expression()
|
||||
else:
|
||||
datetime_format = nodes.Const(None)
|
||||
|
||||
if isinstance(node, nodes.Add):
|
||||
call_method = self.call_method(
|
||||
'_datetime',
|
||||
[node.left, nodes.Const('+'), node.right, datetime_format],
|
||||
lineno=lineno,
|
||||
)
|
||||
elif isinstance(node, nodes.Sub):
|
||||
call_method = self.call_method(
|
||||
'_datetime',
|
||||
[node.left, nodes.Const('-'), node.right, datetime_format],
|
||||
lineno=lineno,
|
||||
)
|
||||
else:
|
||||
call_method = self.call_method(
|
||||
'_now',
|
||||
[node, datetime_format],
|
||||
lineno=lineno,
|
||||
)
|
||||
return nodes.Output([call_method], lineno=lineno)
|
||||
6
changedetectionio/jinja2_custom/plugins/__init__.py
Normal file
6
changedetectionio/jinja2_custom/plugins/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""
|
||||
Jinja2 custom filter plugins for changedetection.io
|
||||
"""
|
||||
from .regex import regex_replace
|
||||
|
||||
__all__ = ['regex_replace']
|
||||
98
changedetectionio/jinja2_custom/plugins/regex.py
Normal file
98
changedetectionio/jinja2_custom/plugins/regex.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
Regex filter plugin for Jinja2 templates.
|
||||
|
||||
Provides regex_replace filter for pattern-based string replacements in templates.
|
||||
"""
|
||||
import re
|
||||
import signal
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def regex_replace(value: str, pattern: str, replacement: str = '', count: int = 0) -> str:
|
||||
"""
|
||||
Replace occurrences of a regex pattern in a string.
|
||||
|
||||
Security: Protected against ReDoS (Regular Expression Denial of Service) attacks:
|
||||
- Limits input value size to prevent excessive processing
|
||||
- Uses timeout mechanism to prevent runaway regex operations
|
||||
- Validates pattern complexity to prevent catastrophic backtracking
|
||||
|
||||
Args:
|
||||
value: The input string to perform replacements on
|
||||
pattern: The regex pattern to search for
|
||||
replacement: The replacement string (default: '')
|
||||
count: Maximum number of replacements (0 = replace all, default: 0)
|
||||
|
||||
Returns:
|
||||
String with replacements applied, or original value on error
|
||||
|
||||
Example:
|
||||
{{ "hello world" | regex_replace("world", "universe") }}
|
||||
{{ diff | regex_replace("<td>([^<]+)</td><td>([^<]+)</td>", "Label1: \\1\\nLabel2: \\2") }}
|
||||
|
||||
Security limits:
|
||||
- Maximum input size: 10MB
|
||||
- Maximum pattern length: 500 characters
|
||||
- Operation timeout: 10 seconds
|
||||
- Dangerous nested quantifier patterns are rejected
|
||||
"""
|
||||
# Security limits
|
||||
MAX_INPUT_SIZE = 1024 * 1024 * 10 # 10MB max input size
|
||||
MAX_PATTERN_LENGTH = 500 # Maximum regex pattern length
|
||||
REGEX_TIMEOUT_SECONDS = 10 # Maximum time for regex operation
|
||||
|
||||
# Validate input sizes
|
||||
value_str = str(value)
|
||||
if len(value_str) > MAX_INPUT_SIZE:
|
||||
logger.warning(f"regex_replace: Input too large ({len(value_str)} bytes), truncating")
|
||||
value_str = value_str[:MAX_INPUT_SIZE]
|
||||
|
||||
if len(pattern) > MAX_PATTERN_LENGTH:
|
||||
logger.warning(f"regex_replace: Pattern too long ({len(pattern)} chars), rejecting")
|
||||
return value_str
|
||||
|
||||
# Check for potentially dangerous patterns (basic checks)
|
||||
# Nested quantifiers like (a+)+ can cause catastrophic backtracking
|
||||
dangerous_patterns = [
|
||||
r'\([^)]*\+[^)]*\)\+', # (x+)+
|
||||
r'\([^)]*\*[^)]*\)\+', # (x*)+
|
||||
r'\([^)]*\+[^)]*\)\*', # (x+)*
|
||||
r'\([^)]*\*[^)]*\)\*', # (x*)*
|
||||
]
|
||||
|
||||
for dangerous in dangerous_patterns:
|
||||
if re.search(dangerous, pattern):
|
||||
logger.warning(f"regex_replace: Potentially dangerous pattern detected: {pattern}")
|
||||
return value_str
|
||||
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutError("Regex operation timed out")
|
||||
|
||||
try:
|
||||
# Set up timeout for regex operation (Unix-like systems only)
|
||||
# This prevents ReDoS attacks
|
||||
old_handler = None
|
||||
if hasattr(signal, 'SIGALRM'):
|
||||
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(REGEX_TIMEOUT_SECONDS)
|
||||
|
||||
try:
|
||||
result = re.sub(pattern, replacement, value_str, count=count)
|
||||
finally:
|
||||
# Cancel the alarm
|
||||
if hasattr(signal, 'SIGALRM'):
|
||||
signal.alarm(0)
|
||||
if old_handler is not None:
|
||||
signal.signal(signal.SIGALRM, old_handler)
|
||||
|
||||
return result
|
||||
|
||||
except TimeoutError:
|
||||
logger.error(f"regex_replace: Regex operation timed out - possible ReDoS attack. Pattern: {pattern}")
|
||||
return value_str
|
||||
except re.error as e:
|
||||
logger.warning(f"regex_replace: Invalid regex pattern: {e}")
|
||||
return value_str
|
||||
except Exception as e:
|
||||
logger.error(f"regex_replace: Unexpected error: {e}")
|
||||
return value_str
|
||||
58
changedetectionio/jinja2_custom/safe_jinja.py
Normal file
58
changedetectionio/jinja2_custom/safe_jinja.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""
|
||||
Safe Jinja2 render with max payload sizes
|
||||
|
||||
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
|
||||
"""
|
||||
|
||||
import jinja2.sandbox
|
||||
import typing as t
|
||||
import os
|
||||
from .extensions.TimeExtension import TimeExtension
|
||||
from .plugins import regex_replace
|
||||
|
||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
|
||||
|
||||
# Default extensions - can be overridden in create_jinja_env()
|
||||
DEFAULT_JINJA2_EXTENSIONS = [TimeExtension]
|
||||
|
||||
def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment:
|
||||
"""
|
||||
Create a sandboxed Jinja2 environment with our custom extensions and default timezone.
|
||||
|
||||
Args:
|
||||
extensions: List of extension classes to use (defaults to DEFAULT_JINJA2_EXTENSIONS)
|
||||
**kwargs: Additional arguments to pass to ImmutableSandboxedEnvironment
|
||||
|
||||
Returns:
|
||||
Configured Jinja2 environment
|
||||
"""
|
||||
if extensions is None:
|
||||
extensions = DEFAULT_JINJA2_EXTENSIONS
|
||||
|
||||
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(
|
||||
extensions=extensions,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
# Get default timezone from environment variable
|
||||
default_timezone = os.getenv('TZ', 'UTC').strip()
|
||||
jinja2_env.default_timezone = default_timezone
|
||||
|
||||
# Register custom filters
|
||||
jinja2_env.filters['regex_replace'] = regex_replace
|
||||
|
||||
return jinja2_env
|
||||
|
||||
|
||||
# This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available.
|
||||
# (Which also limits available functions that could be called)
|
||||
def render(template_str, **args: t.Any) -> str:
|
||||
jinja2_env = create_jinja_env()
|
||||
output = jinja2_env.from_string(template_str).render(args)
|
||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||
|
||||
def render_fully_escaped(content):
|
||||
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
|
||||
template = env.from_string("{{ some_html|e }}")
|
||||
return template.render(some_html=content)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from os import getenv
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||
|
||||
@@ -39,12 +40,12 @@ class model(dict):
|
||||
'api_access_token_enabled': True,
|
||||
'base_url' : None,
|
||||
'empty_pages_are_a_change': False,
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
'ignore_whitespace': True,
|
||||
'ignore_status_codes': False, #@todo implement, as ternary.
|
||||
'notification_body': default_notification_body,
|
||||
'notification_format': default_notification_format,
|
||||
'notification_title': default_notification_title,
|
||||
@@ -55,13 +56,18 @@ class model(dict):
|
||||
'rss_access_token': None,
|
||||
'rss_content_format': RSS_FORMAT_TYPES[0][0],
|
||||
'rss_hide_muted_watches': True,
|
||||
'rss_reader_mode': False,
|
||||
'scheduler_timezone_default': None, # Default IANA timezone name
|
||||
'schema_version' : 0,
|
||||
'shared_diff_access': False,
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
'strip_ignored_lines': False,
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'timezone': None, # Default IANA timezone name
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
'ui': {
|
||||
'use_page_title_in_list': True,
|
||||
'open_diff_in_new_tab': True,
|
||||
'socket_io_enabled': True,
|
||||
'favicons_enabled': True
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -69,7 +75,8 @@ class model(dict):
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
self.update(self.base_config)
|
||||
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
|
||||
self.update(deepcopy(self.base_config))
|
||||
|
||||
|
||||
def parse_headers_from_text_file(filepath):
|
||||
|
||||
@@ -1,37 +1,24 @@
|
||||
from blinker import signal
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from . import watch_base
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
from .. import jinja2_custom as safe_jinja
|
||||
from ..diff import ADDED_PLACEMARKER_OPEN
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
|
||||
# Allowable protocols, protects against javascript: etc
|
||||
# file:// is further checked by ALLOW_FILE_URI
|
||||
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
|
||||
def is_safe_url(test_url):
|
||||
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
|
||||
|
||||
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
|
||||
# 'source:' is a valid way to tell us to return the source
|
||||
|
||||
r = re.compile(re.escape('source:'), re.IGNORECASE)
|
||||
test_url = r.sub('', test_url)
|
||||
|
||||
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
|
||||
if not pattern.match(test_url.strip()):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class model(watch_base):
|
||||
__newest_history_key = None
|
||||
__history_n = 0
|
||||
@@ -41,6 +28,7 @@ class model(watch_base):
|
||||
self.__datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
@@ -60,6 +48,10 @@ class model(watch_base):
|
||||
|
||||
return False
|
||||
|
||||
@property
|
||||
def has_unviewed(self):
|
||||
return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
if not os.path.isdir(self.watch_data_dir):
|
||||
logger.debug(f"> Creating data dir {self.watch_data_dir}")
|
||||
@@ -69,7 +61,7 @@ class model(watch_base):
|
||||
def link(self):
|
||||
|
||||
url = self.get('url', '')
|
||||
if not is_safe_url(url):
|
||||
if not is_safe_valid_url(url):
|
||||
return 'DISABLED'
|
||||
|
||||
ready_url = url
|
||||
@@ -79,9 +71,8 @@ class model(watch_base):
|
||||
ready_url = jinja_render(template_str=url)
|
||||
except Exception as e:
|
||||
logger.critical(f"Invalid URL template for: '{url}' - {str(e)}")
|
||||
from flask import (
|
||||
flash, Markup, url_for
|
||||
)
|
||||
from flask import flash, url_for
|
||||
from markupsafe import Markup
|
||||
message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format(
|
||||
url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', '')))
|
||||
flash(message, 'error')
|
||||
@@ -91,10 +82,17 @@ class model(watch_base):
|
||||
ready_url=ready_url.replace('source:', '')
|
||||
|
||||
# Also double check it after any Jinja2 formatting just incase
|
||||
if not is_safe_url(ready_url):
|
||||
if not is_safe_valid_url(ready_url):
|
||||
return 'DISABLED'
|
||||
return ready_url
|
||||
|
||||
@property
|
||||
def domain_only_from_link(self):
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(self.link)
|
||||
domain = parsed.hostname
|
||||
return domain
|
||||
|
||||
def clear_watch(self):
|
||||
import pathlib
|
||||
|
||||
@@ -120,6 +118,10 @@ class model(watch_base):
|
||||
'remote_server_reply': None,
|
||||
'track_ldjson_price_data': None
|
||||
})
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=self.get('uuid'))
|
||||
|
||||
return
|
||||
|
||||
@property
|
||||
@@ -148,8 +150,8 @@ class model(watch_base):
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
# Used for sorting
|
||||
return self.get('title') if self.get('title') else self.get('url')
|
||||
# Used for sorting, display, etc
|
||||
return self.get('title') or self.get('page_title') or self.link
|
||||
|
||||
@property
|
||||
def last_changed(self):
|
||||
@@ -401,6 +403,154 @@ class model(watch_base):
|
||||
# False is not an option for AppRise, must be type None
|
||||
return None
|
||||
|
||||
def favicon_is_expired(self):
|
||||
favicon_fname = self.get_favicon_filename()
|
||||
import glob
|
||||
import time
|
||||
|
||||
if not favicon_fname:
|
||||
return True
|
||||
try:
|
||||
fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None)
|
||||
logger.trace(f"Favicon file maybe found at {fname}")
|
||||
if os.path.isfile(fname):
|
||||
file_age = int(time.time() - os.path.getmtime(fname))
|
||||
logger.trace(f"Favicon file age is {file_age}s")
|
||||
if file_age < FAVICON_RESAVE_THRESHOLD_SECONDS:
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.critical(f"Exception checking Favicon age {str(e)}")
|
||||
return True
|
||||
|
||||
# Also in the case that the file didnt exist
|
||||
return True
|
||||
|
||||
def bump_favicon(self, url, favicon_base_64: str) -> None:
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
import binascii
|
||||
decoded = None
|
||||
|
||||
if url:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
filename = os.path.basename(parsed.path)
|
||||
(base, extension) = filename.lower().strip().rsplit('.', 1)
|
||||
except ValueError:
|
||||
logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
|
||||
return None
|
||||
else:
|
||||
# Assume favicon.ico
|
||||
base = "favicon"
|
||||
extension = "ico"
|
||||
|
||||
fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
|
||||
|
||||
try:
|
||||
# validate=True makes sure the string only contains valid base64 chars
|
||||
decoded = base64.b64decode(favicon_base_64, validate=True)
|
||||
except (binascii.Error, ValueError) as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
|
||||
else:
|
||||
if decoded:
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=self.get('uuid'))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
|
||||
|
||||
# @todo - Store some checksum and only write when its different
|
||||
logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")
|
||||
|
||||
def get_favicon_filename(self) -> str | None:
|
||||
"""
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
Returns:
|
||||
bytes: Contents of the newest favicon file, or None if not found.
|
||||
"""
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
return None
|
||||
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
return os.path.basename(newest_file)
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
|
||||
Creates a 150x150 pixel thumbnail from the top portion of the screenshot.
|
||||
|
||||
Args:
|
||||
max_age: Maximum age in seconds before recreating thumbnail
|
||||
|
||||
Returns:
|
||||
Path to thumbnail or None if no screenshot exists
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
|
||||
thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
|
||||
top_trim = 500 # Pixels from top of screenshot to use
|
||||
|
||||
screenshot_path = self.get_screenshot()
|
||||
if not screenshot_path:
|
||||
return None
|
||||
|
||||
# Reuse thumbnail if it's fresh and screenshot hasn't changed
|
||||
if os.path.isfile(thumbnail_path):
|
||||
thumbnail_mtime = os.path.getmtime(thumbnail_path)
|
||||
screenshot_mtime = os.path.getmtime(screenshot_path)
|
||||
|
||||
if screenshot_mtime <= thumbnail_mtime and time.time() - thumbnail_mtime < max_age:
|
||||
return thumbnail_path
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
|
||||
with Image.open(screenshot_path) as img:
|
||||
# Crop top portion first (full width, top_trim height)
|
||||
top_crop_height = min(top_trim, img.height)
|
||||
img = img.crop((0, 0, img.width, top_crop_height))
|
||||
|
||||
# Create a smaller intermediate image (to reduce memory usage)
|
||||
aspect = img.width / img.height
|
||||
interim_width = min(top_trim, img.width)
|
||||
interim_height = int(interim_width / aspect) if aspect > 0 else top_trim
|
||||
img = img.resize((interim_width, interim_height), Image.NEAREST)
|
||||
|
||||
# Convert to RGB if needed
|
||||
if img.mode != 'RGB':
|
||||
img = img.convert('RGB')
|
||||
|
||||
# Crop to square from top center
|
||||
square_size = min(img.width, img.height)
|
||||
left = (img.width - square_size) // 2
|
||||
img = img.crop((left, 0, left + square_size, square_size))
|
||||
|
||||
# Final resize to exact thumbnail size with better filter
|
||||
img = img.resize((350, 350), Image.BILINEAR)
|
||||
|
||||
# Save with optimized settings
|
||||
img.save(thumbnail_path, "JPEG", quality=75, optimize=True)
|
||||
|
||||
return thumbnail_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating thumbnail for {self.get('uuid')}: {str(e)}")
|
||||
return None
|
||||
|
||||
def __get_file_ctime(self, filename):
|
||||
fname = os.path.join(self.watch_data_dir, filename)
|
||||
if os.path.isfile(fname):
|
||||
@@ -494,7 +644,7 @@ class model(watch_base):
|
||||
if res:
|
||||
if not csv_writer:
|
||||
# A file on the disk can be transferred much faster via flask than a string reply
|
||||
csv_output_filename = 'report.csv'
|
||||
csv_output_filename = f"report-{self.get('uuid')}.csv"
|
||||
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
|
||||
# @todo some headers in the future
|
||||
#fieldnames = ['Epoch seconds', 'Date']
|
||||
@@ -648,3 +798,44 @@ class model(watch_base):
|
||||
if step_n:
|
||||
available.append(step_n.group(1))
|
||||
return available
|
||||
|
||||
def compile_error_texts(self, has_proxies=None):
|
||||
"""Compile error texts for this watch.
|
||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||
from flask import url_for
|
||||
from markupsafe import Markup
|
||||
|
||||
output = [] # Initialize as list since we're using append
|
||||
last_error = self.get('last_error','')
|
||||
|
||||
try:
|
||||
url_for('settings.settings_page')
|
||||
except Exception as e:
|
||||
has_app_context = False
|
||||
else:
|
||||
has_app_context = True
|
||||
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
if last_error:
|
||||
if '403' in last_error:
|
||||
if has_proxies:
|
||||
output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try other proxies/location</a> '")))
|
||||
else:
|
||||
output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try adding external proxies/locations</a> '")))
|
||||
else:
|
||||
output.append(str(Markup(last_error)))
|
||||
|
||||
if self.get('last_notification_error'):
|
||||
output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>")))
|
||||
|
||||
else:
|
||||
# Lo_Fi version - no app context, cant rely on Jinja2 Markup
|
||||
if last_error:
|
||||
output.append(safe_jinja.render_fully_escaped(last_error))
|
||||
if self.get('last_notification_error'):
|
||||
output.append(safe_jinja.render_fully_escaped(self.get('last_notification_error')))
|
||||
|
||||
res = "\n".join(output)
|
||||
return res
|
||||
|
||||
|
||||
@@ -2,7 +2,8 @@ import os
|
||||
import uuid
|
||||
|
||||
from changedetectionio import strtobool
|
||||
default_notification_format_for_watch = 'System default'
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||
|
||||
class watch_base(dict):
|
||||
|
||||
@@ -15,13 +16,14 @@ class watch_base(dict):
|
||||
'body': None,
|
||||
'browser_steps': [],
|
||||
'browser_steps_last_error_step': None,
|
||||
'conditions' : {},
|
||||
'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
|
||||
'check_count': 0,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'content-type': None,
|
||||
'date_created': None,
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': 'system', # plaintext, playwright etc
|
||||
'fetch_time': 0.0,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
@@ -32,19 +34,22 @@ class watch_base(dict):
|
||||
'has_ldjson_price_data': None,
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'ignore_status_codes': None,
|
||||
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
|
||||
'include_filters': [],
|
||||
'last_checked': 0,
|
||||
'last_error': False,
|
||||
'last_notification_error': None,
|
||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||
'method': 'GET',
|
||||
'notification_alert_count': 0,
|
||||
'notification_body': None,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_format': USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
|
||||
'notification_muted': False,
|
||||
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||
'notification_title': None,
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'page_title': None, # <title> from the page
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||
@@ -53,6 +58,7 @@ class watch_base(dict):
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'remote_server_reply': None, # From 'server' reply header
|
||||
'sort_text_alphabetically': False,
|
||||
'strip_ignored_lines': None,
|
||||
'subtractive_selectors': [],
|
||||
'tag': '', # Old system of text name for a tag, to be removed
|
||||
'tags': [], # list of UUIDs to App.Tags
|
||||
@@ -118,12 +124,13 @@ class watch_base(dict):
|
||||
}
|
||||
},
|
||||
},
|
||||
'title': None,
|
||||
'title': None, # An arbitrary field that overrides 'page_title'
|
||||
'track_ldjson_price_data': None,
|
||||
'trim_text_whitespace': False,
|
||||
'remove_duplicate_lines': False,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': '',
|
||||
'use_page_title_in_list': None, # None = use system settings
|
||||
'uuid': str(uuid.uuid4()),
|
||||
'webdriver_delay': None,
|
||||
'webdriver_js_execute_code': None, # Run before change-detection
|
||||
|
||||
@@ -1,35 +1,16 @@
|
||||
from changedetectionio.model import default_notification_format_for_watch
|
||||
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
|
||||
ult_notification_format_for_watch = 'System default'
|
||||
default_notification_format = 'HTML Color'
|
||||
default_notification_format = 'htmlcolor'
|
||||
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
|
||||
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
|
||||
|
||||
# The values (markdown etc) are from apprise NotifyFormat,
|
||||
# But to avoid importing the whole heavy module just use the same strings here.
|
||||
valid_notification_formats = {
|
||||
'Text': 'text',
|
||||
'Markdown': 'markdown',
|
||||
'HTML': 'html',
|
||||
'HTML Color': 'htmlcolor',
|
||||
'text': 'Plain Text',
|
||||
'html': 'HTML',
|
||||
'htmlcolor': 'HTML Color',
|
||||
'markdown': 'Markdown to HTML',
|
||||
# Used only for editing a watch (not for global)
|
||||
default_notification_format_for_watch: default_notification_format_for_watch
|
||||
}
|
||||
|
||||
|
||||
valid_tokens = {
|
||||
'base_url': '',
|
||||
'current_snapshot': '',
|
||||
'diff': '',
|
||||
'diff_added': '',
|
||||
'diff_full': '',
|
||||
'diff_patch': '',
|
||||
'diff_removed': '',
|
||||
'diff_url': '',
|
||||
'preview_url': '',
|
||||
'triggered_text': '',
|
||||
'watch_tag': '',
|
||||
'watch_title': '',
|
||||
'watch_url': '',
|
||||
'watch_uuid': '',
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
}
|
||||
|
||||
@@ -1,10 +1,61 @@
|
||||
"""
|
||||
Custom Apprise HTTP Handlers with format= Parameter Support
|
||||
|
||||
IMPORTANT: This module works around a limitation in Apprise's @notify decorator.
|
||||
|
||||
THE PROBLEM:
|
||||
-------------
|
||||
When using Apprise's @notify decorator to create custom notification handlers, the
|
||||
decorator creates a CustomNotifyPlugin that uses parse_url(..., simple=True) to parse
|
||||
URLs. This simple parsing mode does NOT extract the format= query parameter from the URL
|
||||
and set it as a top-level parameter that NotifyBase.__init__ can use to set notify_format.
|
||||
|
||||
As a result:
|
||||
1. URL: post://example.com/webhook?format=html
|
||||
2. Apprise parses this and sees format=html in qsd (query string dictionary)
|
||||
3. But it does NOT extract it and pass it to NotifyBase.__init__
|
||||
4. NotifyBase defaults to notify_format=TEXT
|
||||
5. When you call apobj.notify(body="<html>...", body_format="html"):
|
||||
- Apprise sees: input format = html, output format (notify_format) = text
|
||||
- Apprise calls convert_between("html", "text", body)
|
||||
- This strips all HTML tags, leaving only plain text
|
||||
6. Your custom handler receives stripped plain text instead of HTML
|
||||
|
||||
THE SOLUTION:
|
||||
-------------
|
||||
Instead of using the @notify decorator directly, we:
|
||||
1. Manually register custom plugins using plugins.N_MGR.add()
|
||||
2. Create a CustomHTTPHandler class that extends CustomNotifyPlugin
|
||||
3. Override __init__ to extract format= from qsd and set it as kwargs['format']
|
||||
4. Call NotifyBase.__init__ which properly sets notify_format from kwargs['format']
|
||||
5. Set up _default_args like CustomNotifyPlugin does for compatibility
|
||||
|
||||
This ensures that when format=html is in the URL:
|
||||
- notify_format is set to HTML
|
||||
- Apprise sees: input format = html, output format = html
|
||||
- No conversion happens (convert_between returns content unchanged)
|
||||
- Your custom handler receives the original HTML intact
|
||||
|
||||
TESTING:
|
||||
--------
|
||||
To verify this works:
|
||||
>>> apobj = apprise.Apprise()
|
||||
>>> apobj.add('post://localhost:5005/test?format=html')
|
||||
>>> for server in apobj:
|
||||
... print(server.notify_format) # Should print: html (not text)
|
||||
>>> apobj.notify(body='<span>Test</span>', body_format='html')
|
||||
# Your handler should receive '<span>Test</span>' not 'Test'
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from urllib.parse import unquote_plus
|
||||
|
||||
import requests
|
||||
from apprise.decorators import notify
|
||||
from apprise.utils.parse import parse_url as apprise_parse_url
|
||||
from apprise import plugins
|
||||
from apprise.decorators.base import CustomNotifyPlugin
|
||||
from apprise.utils.parse import parse_url as apprise_parse_url, url_assembly
|
||||
from apprise.utils.logic import dict_full_update
|
||||
from loguru import logger
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
@@ -12,13 +63,66 @@ SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}
|
||||
|
||||
|
||||
def notify_supported_methods(func):
|
||||
"""Register custom HTTP method handlers that properly support format= parameter."""
|
||||
for method in SUPPORTED_HTTP_METHODS:
|
||||
func = notify(on=method)(func)
|
||||
# Add support for https, for each supported http method
|
||||
func = notify(on=f"{method}s")(func)
|
||||
_register_http_handler(method, func)
|
||||
_register_http_handler(f"{method}s", func)
|
||||
return func
|
||||
|
||||
|
||||
def _register_http_handler(schema, send_func):
|
||||
"""Register a custom HTTP handler that extracts format= from URL query parameters."""
|
||||
|
||||
# Parse base URL
|
||||
base_url = f"{schema}://"
|
||||
base_args = apprise_parse_url(base_url, default_schema=schema, verify_host=False, simple=True)
|
||||
|
||||
class CustomHTTPHandler(CustomNotifyPlugin):
|
||||
secure_protocol = schema
|
||||
service_name = f"Custom HTTP - {schema.upper()}"
|
||||
_base_args = base_args
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
# Extract format from qsd and set it as a top-level kwarg
|
||||
# This allows NotifyBase.__init__ to properly set notify_format
|
||||
if 'qsd' in kwargs and 'format' in kwargs['qsd']:
|
||||
kwargs['format'] = kwargs['qsd']['format']
|
||||
|
||||
# Call NotifyBase.__init__ (skip CustomNotifyPlugin.__init__)
|
||||
super(CustomNotifyPlugin, self).__init__(**kwargs)
|
||||
|
||||
# Set up _default_args like CustomNotifyPlugin does
|
||||
self._default_args = {}
|
||||
kwargs.pop("secure", None)
|
||||
dict_full_update(self._default_args, self._base_args)
|
||||
dict_full_update(self._default_args, kwargs)
|
||||
self._default_args["url"] = url_assembly(**self._default_args)
|
||||
|
||||
__send = staticmethod(send_func)
|
||||
|
||||
def send(self, body, title="", notify_type="info", *args, **kwargs):
|
||||
"""Call the custom send function."""
|
||||
try:
|
||||
result = self.__send(
|
||||
body, title, notify_type,
|
||||
*args,
|
||||
meta=self._default_args,
|
||||
**kwargs
|
||||
)
|
||||
return True if result is None else bool(result)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Exception in custom HTTP handler: {e}")
|
||||
return False
|
||||
|
||||
# Register the plugin
|
||||
plugins.N_MGR.add(
|
||||
plugin=CustomHTTPHandler,
|
||||
schemas=schema,
|
||||
send_func=send_func,
|
||||
url=base_url,
|
||||
)
|
||||
|
||||
|
||||
def _get_auth(parsed_url: dict) -> str | tuple[str, str]:
|
||||
user: str | None = parsed_url.get("user")
|
||||
password: str | None = parsed_url.get("password")
|
||||
@@ -70,9 +174,12 @@ def apprise_http_custom_handler(
|
||||
title: str,
|
||||
notify_type: str,
|
||||
meta: dict,
|
||||
body_format: str = None,
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> bool:
|
||||
|
||||
|
||||
url: str = meta.get("url")
|
||||
schema: str = meta.get("schema")
|
||||
method: str = re.sub(r"s$", "", schema).upper()
|
||||
@@ -88,25 +195,16 @@ def apprise_http_custom_handler(
|
||||
|
||||
url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))
|
||||
|
||||
try:
|
||||
response = requests.request(
|
||||
method=method,
|
||||
url=url,
|
||||
auth=auth,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=body.encode("utf-8") if isinstance(body, str) else body,
|
||||
)
|
||||
response = requests.request(
|
||||
method=method,
|
||||
url=url,
|
||||
auth=auth,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=body.encode("utf-8") if isinstance(body, str) else body,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
response.raise_for_status()
|
||||
|
||||
logger.info(f"Successfully sent custom notification to {url}")
|
||||
return True
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Remote host error while sending custom notification to {url}: {e}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}")
|
||||
return False
|
||||
logger.info(f"Successfully sent custom notification to {url}")
|
||||
return True
|
||||
|
||||
286
changedetectionio/notification/apprise_plugin/discord.py
Normal file
286
changedetectionio/notification/apprise_plugin/discord.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
Custom Discord plugin for changedetection.io
|
||||
Extends Apprise's Discord plugin to support custom colored embeds for removed/added content
|
||||
"""
|
||||
from apprise.plugins.discord import NotifyDiscord
|
||||
from apprise.decorators import notify
|
||||
from apprise.common import NotifyFormat
|
||||
from loguru import logger
|
||||
|
||||
# Import placeholders from changedetection's diff module
|
||||
from ...diff import (
|
||||
REMOVED_PLACEMARKER_OPEN,
|
||||
REMOVED_PLACEMARKER_CLOSED,
|
||||
ADDED_PLACEMARKER_OPEN,
|
||||
ADDED_PLACEMARKER_CLOSED,
|
||||
CHANGED_PLACEMARKER_OPEN,
|
||||
CHANGED_PLACEMARKER_CLOSED,
|
||||
CHANGED_INTO_PLACEMARKER_OPEN,
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED,
|
||||
)
|
||||
|
||||
# Discord embed sidebar colors for different change types
|
||||
DISCORD_COLOR_UNCHANGED = 8421504 # Gray (#808080)
|
||||
DISCORD_COLOR_REMOVED = 16711680 # Red (#FF0000)
|
||||
DISCORD_COLOR_ADDED = 65280 # Green (#00FF00)
|
||||
DISCORD_COLOR_CHANGED = 16753920 # Orange (#FFA500)
|
||||
DISCORD_COLOR_CHANGED_INTO = 3447003 # Blue (#5865F2 - Discord blue)
|
||||
DISCORD_COLOR_WARNING = 16776960 # Yellow (#FFFF00)
|
||||
|
||||
|
||||
class NotifyDiscordCustom(NotifyDiscord):
|
||||
"""
|
||||
Custom Discord notification handler that supports multiple colored embeds
|
||||
for showing removed (red) and added (green) content separately.
|
||||
"""
|
||||
|
||||
def send(self, body, title="", notify_type=None, attach=None, **kwargs):
|
||||
"""
|
||||
Override send method to create custom embeds with red/green colors
|
||||
for removed/added content when placeholders are present.
|
||||
"""
|
||||
|
||||
# Check if body contains our diff placeholders
|
||||
has_removed = REMOVED_PLACEMARKER_OPEN in body
|
||||
has_added = ADDED_PLACEMARKER_OPEN in body
|
||||
has_changed = CHANGED_PLACEMARKER_OPEN in body
|
||||
has_changed_into = CHANGED_INTO_PLACEMARKER_OPEN in body
|
||||
|
||||
# If we have diff placeholders and we're in markdown/html format, create custom embeds
|
||||
if (has_removed or has_added or has_changed or has_changed_into) and self.notify_format in (NotifyFormat.MARKDOWN, NotifyFormat.HTML):
|
||||
return self._send_with_colored_embeds(body, title, notify_type, attach, **kwargs)
|
||||
|
||||
# Otherwise, use the parent class's default behavior
|
||||
return super().send(body, title, notify_type, attach, **kwargs)
|
||||
|
||||
def _send_with_colored_embeds(self, body, title, notify_type, attach, **kwargs):
|
||||
"""
|
||||
Send Discord message with embeds in the original diff order.
|
||||
Preserves the sequence: unchanged -> removed -> added -> unchanged, etc.
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
|
||||
payload = {
|
||||
"tts": self.tts,
|
||||
"wait": self.tts is False,
|
||||
}
|
||||
|
||||
if self.flags:
|
||||
payload["flags"] = self.flags
|
||||
|
||||
# Acquire image_url
|
||||
image_url = self.image_url(notify_type)
|
||||
|
||||
if self.avatar and (image_url or self.avatar_url):
|
||||
payload["avatar_url"] = self.avatar_url if self.avatar_url else image_url
|
||||
|
||||
if self.user:
|
||||
payload["username"] = self.user
|
||||
|
||||
# Associate our thread_id with our message
|
||||
params = {"thread_id": self.thread_id} if self.thread_id else None
|
||||
|
||||
# Build embeds array preserving order
|
||||
embeds = []
|
||||
|
||||
# Add title as plain bold text in message content (not an embed)
|
||||
if title:
|
||||
payload["content"] = f"**{title}**"
|
||||
|
||||
# Parse the body into ordered chunks
|
||||
chunks = self._parse_body_into_chunks(body)
|
||||
|
||||
# Discord limits:
|
||||
# - Max 10 embeds per message
|
||||
# - Max 6000 characters total across all embeds
|
||||
# - Max 4096 characters per embed description
|
||||
max_embeds = 10
|
||||
max_total_chars = 6000
|
||||
max_embed_description = 4096
|
||||
|
||||
# All 10 embed slots are available for content
|
||||
max_content_embeds = max_embeds
|
||||
|
||||
# Start character count
|
||||
total_chars = 0
|
||||
|
||||
# Create embeds from chunks in order (no titles, just color coding)
|
||||
for chunk_type, content in chunks:
|
||||
if not content.strip():
|
||||
continue
|
||||
|
||||
# Truncate individual embed description if needed
|
||||
if len(content) > max_embed_description:
|
||||
content = content[:max_embed_description - 3] + "..."
|
||||
|
||||
# Check if we're approaching the embed count limit
|
||||
# We need room for the warning embed, so stop at max_content_embeds - 1
|
||||
current_content_embeds = len(embeds)
|
||||
if current_content_embeds >= max_content_embeds - 1:
|
||||
# Add a truncation notice (this will be the 10th embed)
|
||||
embeds.append({
|
||||
"description": "⚠️ Content truncated (Discord 10 embed limit reached) - Tip: Select 'Plain Text' or 'HTML' format for longer diffs",
|
||||
"color": DISCORD_COLOR_WARNING,
|
||||
})
|
||||
break
|
||||
|
||||
# Check if adding this embed would exceed total character limit
|
||||
if total_chars + len(content) > max_total_chars:
|
||||
# Add a truncation notice
|
||||
remaining_chars = max_total_chars - total_chars
|
||||
if remaining_chars > 100:
|
||||
# Add partial content if we have room
|
||||
truncated_content = content[:remaining_chars - 100] + "..."
|
||||
embeds.append({
|
||||
"description": truncated_content,
|
||||
"color": (DISCORD_COLOR_UNCHANGED if chunk_type == "unchanged"
|
||||
else DISCORD_COLOR_REMOVED if chunk_type == "removed"
|
||||
else DISCORD_COLOR_ADDED),
|
||||
})
|
||||
embeds.append({
|
||||
"description": "⚠️ Content truncated (Discord 6000 char limit reached)\nTip: Select 'Plain Text' or 'HTML' format for longer diffs",
|
||||
"color": DISCORD_COLOR_WARNING,
|
||||
})
|
||||
break
|
||||
|
||||
if chunk_type == "unchanged":
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_UNCHANGED,
|
||||
})
|
||||
elif chunk_type == "removed":
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_REMOVED,
|
||||
})
|
||||
elif chunk_type == "added":
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_ADDED,
|
||||
})
|
||||
elif chunk_type == "changed":
|
||||
# Changed (old value) - use orange to distinguish from pure removal
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_CHANGED,
|
||||
})
|
||||
elif chunk_type == "changed_into":
|
||||
# Changed into (new value) - use blue to distinguish from pure addition
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_CHANGED_INTO,
|
||||
})
|
||||
|
||||
total_chars += len(content)
|
||||
|
||||
if embeds:
|
||||
payload["embeds"] = embeds
|
||||
|
||||
# Send the payload using parent's _send method
|
||||
if not self._send(payload, params=params):
|
||||
return False
|
||||
|
||||
# Handle attachments if present
|
||||
if attach and self.attachment_support:
|
||||
payload.update({
|
||||
"tts": False,
|
||||
"wait": True,
|
||||
})
|
||||
payload.pop("embeds", None)
|
||||
payload.pop("content", None)
|
||||
payload.pop("allow_mentions", None)
|
||||
|
||||
for attachment in attach:
|
||||
self.logger.info(f"Posting Discord Attachment {attachment.name}")
|
||||
if not self._send(payload, params=params, attach=attachment):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _parse_body_into_chunks(self, body):
|
||||
"""
|
||||
Parse the body into ordered chunks of (type, content) tuples.
|
||||
Types: "unchanged", "removed", "added", "changed", "changed_into"
|
||||
Preserves the original order of the diff.
|
||||
"""
|
||||
chunks = []
|
||||
position = 0
|
||||
|
||||
while position < len(body):
|
||||
# Find the next marker
|
||||
next_removed = body.find(REMOVED_PLACEMARKER_OPEN, position)
|
||||
next_added = body.find(ADDED_PLACEMARKER_OPEN, position)
|
||||
next_changed = body.find(CHANGED_PLACEMARKER_OPEN, position)
|
||||
next_changed_into = body.find(CHANGED_INTO_PLACEMARKER_OPEN, position)
|
||||
|
||||
# Determine which marker comes first
|
||||
if next_removed == -1 and next_added == -1 and next_changed == -1 and next_changed_into == -1:
|
||||
# No more markers, rest is unchanged
|
||||
if position < len(body):
|
||||
chunks.append(("unchanged", body[position:]))
|
||||
break
|
||||
|
||||
# Find the earliest marker
|
||||
next_marker_pos = None
|
||||
next_marker_type = None
|
||||
|
||||
# Compare all marker positions to find the earliest
|
||||
markers = []
|
||||
if next_removed != -1:
|
||||
markers.append((next_removed, "removed"))
|
||||
if next_added != -1:
|
||||
markers.append((next_added, "added"))
|
||||
if next_changed != -1:
|
||||
markers.append((next_changed, "changed"))
|
||||
if next_changed_into != -1:
|
||||
markers.append((next_changed_into, "changed_into"))
|
||||
|
||||
if markers:
|
||||
next_marker_pos, next_marker_type = min(markers, key=lambda x: x[0])
|
||||
|
||||
# Add unchanged content before the marker
|
||||
if next_marker_pos > position:
|
||||
chunks.append(("unchanged", body[position:next_marker_pos]))
|
||||
|
||||
# Find the closing marker
|
||||
if next_marker_type == "removed":
|
||||
open_marker = REMOVED_PLACEMARKER_OPEN
|
||||
close_marker = REMOVED_PLACEMARKER_CLOSED
|
||||
elif next_marker_type == "added":
|
||||
open_marker = ADDED_PLACEMARKER_OPEN
|
||||
close_marker = ADDED_PLACEMARKER_CLOSED
|
||||
elif next_marker_type == "changed":
|
||||
open_marker = CHANGED_PLACEMARKER_OPEN
|
||||
close_marker = CHANGED_PLACEMARKER_CLOSED
|
||||
else: # changed_into
|
||||
open_marker = CHANGED_INTO_PLACEMARKER_OPEN
|
||||
close_marker = CHANGED_INTO_PLACEMARKER_CLOSED
|
||||
|
||||
close_pos = body.find(close_marker, next_marker_pos)
|
||||
|
||||
if close_pos == -1:
|
||||
# No closing marker, take rest as this type
|
||||
content = body[next_marker_pos + len(open_marker):]
|
||||
chunks.append((next_marker_type, content))
|
||||
break
|
||||
else:
|
||||
# Extract content between markers
|
||||
content = body[next_marker_pos + len(open_marker):close_pos]
|
||||
chunks.append((next_marker_type, content))
|
||||
position = close_pos + len(close_marker)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
# Register the custom Discord handler with Apprise
|
||||
# This will override the built-in discord:// handler
|
||||
@notify(on="discord")
|
||||
def discord_custom_wrapper(body, title, notify_type, meta, body_format=None, *args, **kwargs):
|
||||
"""
|
||||
Wrapper function to make the custom Discord handler work with Apprise's decorator system.
|
||||
Note: This decorator approach may not work for overriding built-in plugins.
|
||||
The class-based approach above is the proper way to extend NotifyDiscord.
|
||||
"""
|
||||
logger.info("Custom Discord handler called")
|
||||
# This is here for potential future use with decorator-based registration
|
||||
return True
|
||||
42
changedetectionio/notification/email_helpers.py
Normal file
42
changedetectionio/notification/email_helpers.py
Normal file
@@ -0,0 +1,42 @@
|
||||
def as_monospaced_html_email(content: str, title: str) -> str:
|
||||
"""
|
||||
Wraps `content` in a minimal, email-safe HTML template
|
||||
that forces monospace rendering across Gmail, Hotmail, Apple Mail, etc.
|
||||
|
||||
Args:
|
||||
content: The body text (plain text or HTML-like).
|
||||
title: The title plaintext
|
||||
Returns:
|
||||
A complete HTML document string suitable for sending as an email body.
|
||||
"""
|
||||
|
||||
# All line feed types should be removed and then this function should only be fed <br>'s
|
||||
# Then it works with our <pre> styling without double linefeeds
|
||||
content = content.translate(str.maketrans('', '', '\r\n'))
|
||||
|
||||
if title:
|
||||
import html
|
||||
title = html.escape(title)
|
||||
else:
|
||||
title = ''
|
||||
# 2. Full email-safe HTML
|
||||
html_email = f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="x-apple-disable-message-reformatting">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<!--[if mso]>
|
||||
<style>
|
||||
body, div, pre, td {{ font-family: "Courier New", Courier, monospace !important; }}
|
||||
</style>
|
||||
<![endif]-->
|
||||
<title>{title}</title>
|
||||
</head>
|
||||
<body style="-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;">
|
||||
<pre role="article" aria-roledescription="email" lang="en"
|
||||
style="font-family: monospace, 'Courier New', Courier; font-size: 0.9rem;
|
||||
white-space: pre-wrap; word-break: break-word;">{content}</pre>
|
||||
</body>
|
||||
</html>"""
|
||||
return html_email
|
||||
@@ -1,32 +1,285 @@
|
||||
|
||||
import time
|
||||
import re
|
||||
import apprise
|
||||
from apprise import NotifyFormat
|
||||
from loguru import logger
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
|
||||
from .apprise_plugin.custom_handlers import SUPPORTED_HTTP_METHODS
|
||||
from .email_helpers import as_monospaced_html_email
|
||||
from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \
|
||||
ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \
|
||||
CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE
|
||||
from ..notification_service import NotificationContextData, CUSTOM_LINEBREAK_PLACEHOLDER
|
||||
|
||||
|
||||
def process_notification(n_object, datastore):
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
|
||||
|
||||
def markup_text_links_to_html(body):
|
||||
"""
|
||||
Convert plaintext to HTML with clickable links.
|
||||
Uses Jinja2's escape and Markup for XSS safety.
|
||||
"""
|
||||
from linkify_it import LinkifyIt
|
||||
from markupsafe import Markup, escape
|
||||
|
||||
linkify = LinkifyIt()
|
||||
|
||||
# Match URLs in the ORIGINAL text (before escaping)
|
||||
matches = linkify.match(body)
|
||||
|
||||
if not matches:
|
||||
# No URLs, just escape everything
|
||||
return Markup(escape(body))
|
||||
|
||||
result = []
|
||||
last_index = 0
|
||||
|
||||
# Process each URL match
|
||||
for match in matches:
|
||||
# Add escaped text before the URL
|
||||
if match.index > last_index:
|
||||
text_part = body[last_index:match.index]
|
||||
result.append(escape(text_part))
|
||||
|
||||
# Add the link with escaped URL (both in href and display)
|
||||
url = match.url
|
||||
result.append(Markup(f'<a href="{escape(url)}">{escape(url)}</a>'))
|
||||
|
||||
last_index = match.last_index
|
||||
|
||||
# Add remaining escaped text
|
||||
if last_index < len(body):
|
||||
result.append(escape(body[last_index:]))
|
||||
|
||||
# Join all parts
|
||||
return str(Markup(''.join(str(part) for part in result)))
|
||||
|
||||
def notification_format_align_with_apprise(n_format : str):
|
||||
"""
|
||||
Correctly align changedetection's formats with apprise's formats
|
||||
Probably these are the same - but good to be sure.
|
||||
These set the expected OUTPUT format type
|
||||
:param n_format:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if n_format.startswith('html'):
|
||||
# Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here
|
||||
n_format = NotifyFormat.HTML.value
|
||||
elif n_format.startswith('markdown'):
|
||||
# probably the same but just to be safe
|
||||
n_format = NotifyFormat.MARKDOWN.value
|
||||
elif n_format.startswith('text'):
|
||||
# probably the same but just to be safe
|
||||
n_format = NotifyFormat.TEXT.value
|
||||
else:
|
||||
n_format = NotifyFormat.TEXT.value
|
||||
|
||||
return n_format
|
||||
|
||||
|
||||
def apply_html_color_to_body(n_body: str):
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN,
|
||||
f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n')
|
||||
return n_body
|
||||
|
||||
def apply_discord_markdown_to_body(n_body):
|
||||
"""
|
||||
Discord does not support <del> but it supports non-standard ~~strikethrough~~
|
||||
:param n_body:
|
||||
:return:
|
||||
"""
|
||||
import re
|
||||
# Define the mapping between your placeholders and markdown markers
|
||||
replacements = [
|
||||
(REMOVED_PLACEMARKER_OPEN, '~~', REMOVED_PLACEMARKER_CLOSED, '~~'),
|
||||
(ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
|
||||
(CHANGED_PLACEMARKER_OPEN, '~~', CHANGED_PLACEMARKER_CLOSED, '~~'),
|
||||
(CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
|
||||
]
|
||||
# So that the markdown gets added without any whitespace following it which would break it
|
||||
for open_tag, open_md, close_tag, close_md in replacements:
|
||||
# Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
|
||||
pattern = re.compile(
|
||||
re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
|
||||
flags=re.DOTALL
|
||||
)
|
||||
n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
|
||||
return n_body
|
||||
|
||||
def apply_standard_markdown_to_body(n_body):
|
||||
"""
|
||||
Apprise does not support ~~strikethrough~~ but it will convert <del> to HTML strikethrough.
|
||||
:param n_body:
|
||||
:return:
|
||||
"""
|
||||
import re
|
||||
# Define the mapping between your placeholders and markdown markers
|
||||
replacements = [
|
||||
(REMOVED_PLACEMARKER_OPEN, '<del>', REMOVED_PLACEMARKER_CLOSED, '</del>'),
|
||||
(ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
|
||||
(CHANGED_PLACEMARKER_OPEN, '<del>', CHANGED_PLACEMARKER_CLOSED, '</del>'),
|
||||
(CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
|
||||
]
|
||||
|
||||
# So that the markdown gets added without any whitespace following it which would break it
|
||||
for open_tag, open_md, close_tag, close_md in replacements:
|
||||
# Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
|
||||
pattern = re.compile(
|
||||
re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
|
||||
flags=re.DOTALL
|
||||
)
|
||||
n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
|
||||
return n_body
|
||||
|
||||
|
||||
def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
|
||||
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
|
||||
# Because different notifications may require different pre-processing, run each sequentially :(
|
||||
# 2000 bytes minus -
|
||||
# 200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers
|
||||
# Length of URL - Incase they specify a longer custom avatar_url
|
||||
|
||||
if not n_body or not n_body.strip():
|
||||
return url, n_body, n_title
|
||||
|
||||
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
|
||||
parsed = urlparse(url)
|
||||
k = '?' if not parsed.query else '&'
|
||||
if url and not 'avatar_url' in url \
|
||||
and not url.startswith('mail') \
|
||||
and not url.startswith('post') \
|
||||
and not url.startswith('get') \
|
||||
and not url.startswith('delete') \
|
||||
and not url.startswith('put'):
|
||||
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
|
||||
|
||||
if url.startswith('tgram://'):
|
||||
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
||||
# re https://github.com/dgtlmoon/changedetection.io/issues/555
|
||||
# @todo re-use an existing library we have already imported to strip all non-allowed tags
|
||||
n_body = n_body.replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n')
|
||||
|
||||
# Use strikethrough for removed content, bold for added content
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '<s>')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '</s>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '<b>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '</b>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, '<s>')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, '</s>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>')
|
||||
|
||||
# real limit is 4096, but minus some for extra metadata
|
||||
payload_max_size = 3600
|
||||
body_limit = max(0, payload_max_size - len(n_title))
|
||||
n_title = n_title[0:payload_max_size]
|
||||
n_body = n_body[0:body_limit]
|
||||
|
||||
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
|
||||
or url.startswith('https://discord.com/api'))\
|
||||
and 'html' in requested_output_format:
|
||||
# Discord doesn't support HTML, replace <br> with newlines
|
||||
n_body = n_body.strip().replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n')
|
||||
|
||||
# Don't replace placeholders or truncate here - let the custom Discord plugin handle it
|
||||
# The plugin will use embeds (6000 char limit across all embeds) if placeholders are present,
|
||||
# or plain content (2000 char limit) otherwise
|
||||
|
||||
# Only do placeholder replacement if NOT using htmlcolor (which triggers embeds in custom plugin)
|
||||
if requested_output_format == 'html':
|
||||
# No diff placeholders, use Discord markdown for any other formatting
|
||||
# Use Discord markdown: strikethrough for removed, bold for added
|
||||
n_body = apply_discord_markdown_to_body(n_body=n_body)
|
||||
|
||||
# Apply 2000 char limit for plain content
|
||||
payload_max_size = 1700
|
||||
body_limit = max(0, payload_max_size - len(n_title))
|
||||
n_title = n_title[0:payload_max_size]
|
||||
n_body = n_body[0:body_limit]
|
||||
# else: our custom Discord plugin will convert any placeholders left over into embeds with color bars
|
||||
|
||||
# Is not discord/tgram and they want htmlcolor
|
||||
elif requested_output_format == 'htmlcolor':
|
||||
n_body = apply_html_color_to_body(n_body=n_body)
|
||||
|
||||
elif requested_output_format == 'html':
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
|
||||
n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n')
|
||||
elif requested_output_format == 'markdown':
|
||||
# Markdown to HTML - Apprise will convert this to HTML
|
||||
n_body = apply_standard_markdown_to_body(n_body=n_body)
|
||||
|
||||
else: #plaintext etc default
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
|
||||
|
||||
return url, n_body, n_title
|
||||
|
||||
|
||||
def process_notification(n_object: NotificationContextData, datastore):
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from . import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_format, valid_notification_formats
|
||||
# be sure its registered
|
||||
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
|
||||
# Register custom Discord plugin
|
||||
from .apprise_plugin.discord import NotifyDiscordCustom
|
||||
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
|
||||
now = time.time()
|
||||
if n_object.get('notification_timestamp'):
|
||||
logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s")
|
||||
|
||||
# Insert variables into the notification content
|
||||
notification_parameters = create_notification_parameters(n_object, datastore)
|
||||
|
||||
n_format = valid_notification_formats.get(
|
||||
n_object.get('notification_format', default_notification_format),
|
||||
valid_notification_formats[default_notification_format],
|
||||
)
|
||||
requested_output_format = n_object.get('notification_format', default_notification_format)
|
||||
logger.debug(f"Requested notification output format: '{requested_output_format}'")
|
||||
|
||||
# If we arrived with 'System default' then look it up
|
||||
if n_format == default_notification_format_for_watch and datastore.data['settings']['application'].get('notification_format') != default_notification_format_for_watch:
|
||||
if requested_output_format == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
# Initially text or whatever
|
||||
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
|
||||
requested_output_format = datastore.data['settings']['application'].get('notification_format', default_notification_format)
|
||||
|
||||
requested_output_format_original = requested_output_format
|
||||
|
||||
# Now clean it up so it fits perfectly with apprise
|
||||
requested_output_format = notification_format_align_with_apprise(n_format=requested_output_format)
|
||||
|
||||
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.2f}s")
|
||||
|
||||
@@ -41,16 +294,23 @@ def process_notification(n_object, datastore):
|
||||
|
||||
apobj = apprise.Apprise(debug=True, asset=apprise_asset)
|
||||
|
||||
# Override Apprise's built-in Discord plugin with our custom one
|
||||
# This allows us to use colored embeds for diff content
|
||||
# First remove the built-in discord plugin, then add our custom one
|
||||
apprise.plugins.N_MGR.remove('discord')
|
||||
apprise.plugins.N_MGR.add(NotifyDiscordCustom, schemas='discord')
|
||||
|
||||
if not n_object.get('notification_urls'):
|
||||
return None
|
||||
|
||||
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
|
||||
with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
|
||||
for url in n_object['notification_urls']:
|
||||
|
||||
# Get the notification body from datastore
|
||||
n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters)
|
||||
if n_object.get('notification_format', '').startswith('HTML'):
|
||||
n_body = n_body.replace("\n", '<br>')
|
||||
|
||||
if n_object.get('markup_text_links_to_html_links'):
|
||||
n_body = markup_text_links_to_html(body=n_body)
|
||||
|
||||
n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters)
|
||||
|
||||
@@ -66,74 +326,98 @@ def process_notification(n_object, datastore):
|
||||
logger.info(f">> Process Notification: AppRise notifying {url}")
|
||||
url = jinja_render(template_str=url, **notification_parameters)
|
||||
|
||||
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
|
||||
# Because different notifications may require different pre-processing, run each sequentially :(
|
||||
# 2000 bytes minus -
|
||||
# 200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers
|
||||
# Length of URL - Incase they specify a longer custom avatar_url
|
||||
# If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped
|
||||
watch_mime_type = n_object.get('watch_mime_type')
|
||||
if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower():
|
||||
if 'html' in requested_output_format:
|
||||
from markupsafe import escape
|
||||
n_body = str(escape(n_body))
|
||||
|
||||
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
|
||||
k = '?' if not '?' in url else '&'
|
||||
if not 'avatar_url' in url \
|
||||
and not url.startswith('mail') \
|
||||
and not url.startswith('post') \
|
||||
and not url.startswith('get') \
|
||||
and not url.startswith('delete') \
|
||||
and not url.startswith('put'):
|
||||
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
|
||||
if 'html' in requested_output_format:
|
||||
# Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output
|
||||
# But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and this" etc which is too much.
|
||||
n_body = n_body.replace(' ', ' ')
|
||||
|
||||
if url.startswith('tgram://'):
|
||||
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
||||
# re https://github.com/dgtlmoon/changedetection.io/issues/555
|
||||
# @todo re-use an existing library we have already imported to strip all non-allowed tags
|
||||
n_body = n_body.replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
# real limit is 4096, but minus some for extra metadata
|
||||
payload_max_size = 3600
|
||||
body_limit = max(0, payload_max_size - len(n_title))
|
||||
n_title = n_title[0:payload_max_size]
|
||||
n_body = n_body[0:body_limit]
|
||||
(url, n_body, n_title) = apply_service_tweaks(url=url, n_body=n_body, n_title=n_title, requested_output_format=requested_output_format_original)
|
||||
|
||||
elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith(
|
||||
'https://discord.com/api'):
|
||||
# real limit is 2000, but minus some for extra metadata
|
||||
payload_max_size = 1700
|
||||
body_limit = max(0, payload_max_size - len(n_title))
|
||||
n_title = n_title[0:payload_max_size]
|
||||
n_body = n_body[0:body_limit]
|
||||
apprise_input_format = "NO-THANKS-WE-WILL-MANAGE-ALL-OF-THIS"
|
||||
|
||||
elif url.startswith('mailto'):
|
||||
# Apprise will default to HTML, so we need to override it
|
||||
# So that whats' generated in n_body is in line with what is going to be sent.
|
||||
# https://github.com/caronc/apprise/issues/633#issuecomment-1191449321
|
||||
if not 'format=' in url and (n_format == 'Text' or n_format == 'Markdown'):
|
||||
prefix = '?' if not '?' in url else '&'
|
||||
# Apprise format is lowercase text https://github.com/caronc/apprise/issues/633
|
||||
n_format = n_format.lower()
|
||||
url = f"{url}{prefix}format={n_format}"
|
||||
# If n_format == HTML, then apprise email should default to text/html and we should be sending HTML only
|
||||
if not 'format=' in url:
|
||||
parsed_url = urlparse(url)
|
||||
prefix_add_to_url = '?' if not parsed_url.query else '&'
|
||||
|
||||
apobj.add(url)
|
||||
# THIS IS THE TRICK HOW TO DISABLE APPRISE DOING WEIRD AUTO-CONVERSION WITH BREAKING BR TAGS ETC
|
||||
if 'html' in requested_output_format:
|
||||
url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
|
||||
apprise_input_format = NotifyFormat.HTML.value
|
||||
elif 'text' in requested_output_format:
|
||||
url = f"{url}{prefix_add_to_url}format={NotifyFormat.TEXT.value}"
|
||||
apprise_input_format = NotifyFormat.TEXT.value
|
||||
|
||||
elif requested_output_format == NotifyFormat.MARKDOWN.value:
|
||||
# Convert markdown to HTML ourselves since not all plugins do this
|
||||
from apprise.conversion import markdown_to_html
|
||||
# Make sure there are paragraph breaks around horizontal rules
|
||||
n_body = n_body.replace('---', '\n\n---\n\n')
|
||||
n_body = markdown_to_html(n_body)
|
||||
url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
|
||||
requested_output_format = NotifyFormat.HTML.value
|
||||
apprise_input_format = NotifyFormat.HTML.value # Changed from MARKDOWN to HTML
|
||||
|
||||
# Could have arrived at any stage, so we dont end up running .escape on it
|
||||
# Replace CUSTOM_LINEBREAK_PLACEHOLDER followed by optional \r and/or \n
|
||||
if 'html' in requested_output_format:
|
||||
# could be @BR@ with optional \r\n, so we dont add more \n's
|
||||
n_body = re.sub(
|
||||
re.escape(CUSTOM_LINEBREAK_PLACEHOLDER) + r'\r?\n?',
|
||||
'<br>\\r\\n',
|
||||
n_body
|
||||
)
|
||||
else:
|
||||
# texty types
|
||||
n_body = re.sub(
|
||||
re.escape(CUSTOM_LINEBREAK_PLACEHOLDER) + r'\r?\n?',
|
||||
'\\r\\n',
|
||||
n_body
|
||||
)
|
||||
|
||||
else:
|
||||
# ?format was IN the apprise URL, they are kind of on their own here, we will try our best
|
||||
if 'format=html' in url:
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n')
|
||||
# This will also prevent apprise from doing conversion
|
||||
apprise_input_format = NotifyFormat.HTML.value
|
||||
requested_output_format = NotifyFormat.HTML.value
|
||||
elif 'format=text' in url:
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n')
|
||||
apprise_input_format = NotifyFormat.TEXT.value
|
||||
requested_output_format = NotifyFormat.TEXT.value
|
||||
|
||||
sent_objs.append({'title': n_title,
|
||||
'body': n_body,
|
||||
'url': url,
|
||||
'body_format': n_format})
|
||||
'url': url})
|
||||
apobj.add(url)
|
||||
|
||||
# Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely.
|
||||
# It should always be similar to the 'history' part of the UI.
|
||||
if url.startswith('mail') and 'html' in requested_output_format:
|
||||
if not '<pre' in n_body and not '<body' in n_body: # No custom HTML-ish body was setup already
|
||||
n_body = as_monospaced_html_email(content=n_body, title=n_title)
|
||||
|
||||
# Blast off the notifications tht are set in .add()
|
||||
apobj.notify(
|
||||
title=n_title,
|
||||
body=n_body,
|
||||
body_format=n_format,
|
||||
# `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise
|
||||
# &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between)
|
||||
body_format=apprise_input_format,
|
||||
# False is not an option for AppRise, must be type None
|
||||
attach=n_object.get('screenshot', None)
|
||||
)
|
||||
|
||||
|
||||
# Returns empty string if nothing found, multi-line string otherwise
|
||||
log_value = logs.getvalue()
|
||||
|
||||
if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
|
||||
if log_value and ('WARNING' in log_value or 'ERROR' in log_value):
|
||||
logger.critical(log_value)
|
||||
raise Exception(log_value)
|
||||
|
||||
@@ -143,17 +427,15 @@ def process_notification(n_object, datastore):
|
||||
|
||||
# Notification title + body content parameters get created here.
|
||||
# ( Where we prepare the tokens in the notification to be replaced with actual values )
|
||||
def create_notification_parameters(n_object, datastore):
|
||||
from copy import deepcopy
|
||||
from . import valid_tokens
|
||||
def create_notification_parameters(n_object: NotificationContextData, datastore):
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
|
||||
# in the case we send a test notification from the main settings, there is no UUID.
|
||||
uuid = n_object['uuid'] if 'uuid' in n_object else ''
|
||||
|
||||
if uuid:
|
||||
watch_title = datastore.data['watching'][uuid].get('title', '')
|
||||
watch = datastore.data['watching'].get(n_object['uuid'])
|
||||
if watch:
|
||||
watch_title = datastore.data['watching'][n_object['uuid']].label
|
||||
tag_list = []
|
||||
tags = datastore.get_all_tags_for_watch(uuid)
|
||||
tags = datastore.get_all_tags_for_watch(n_object['uuid'])
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
tag_list.append(tag.get('title'))
|
||||
@@ -168,14 +450,10 @@ def create_notification_parameters(n_object, datastore):
|
||||
|
||||
watch_url = n_object['watch_url']
|
||||
|
||||
diff_url = "{}/diff/{}".format(base_url, uuid)
|
||||
preview_url = "{}/preview/{}".format(base_url, uuid)
|
||||
diff_url = "{}/diff/{}".format(base_url, n_object['uuid'])
|
||||
preview_url = "{}/preview/{}".format(base_url, n_object['uuid'])
|
||||
|
||||
# Not sure deepcopy is needed here, but why not
|
||||
tokens = deepcopy(valid_tokens)
|
||||
|
||||
# Valid_tokens also used as a field validator
|
||||
tokens.update(
|
||||
n_object.update(
|
||||
{
|
||||
'base_url': base_url,
|
||||
'diff_url': diff_url,
|
||||
@@ -183,13 +461,10 @@ def create_notification_parameters(n_object, datastore):
|
||||
'watch_tag': watch_tag if watch_tag is not None else '',
|
||||
'watch_title': watch_title if watch_title is not None else '',
|
||||
'watch_url': watch_url,
|
||||
'watch_uuid': uuid,
|
||||
'watch_uuid': n_object['uuid'],
|
||||
})
|
||||
|
||||
# n_object will contain diff, diff_added etc etc
|
||||
tokens.update(n_object)
|
||||
if watch:
|
||||
n_object.update(datastore.data['watching'].get(n_object['uuid']).extra_notification_token_values())
|
||||
|
||||
if uuid:
|
||||
tokens.update(datastore.data['watching'].get(uuid).extra_notification_token_values())
|
||||
|
||||
return tokens
|
||||
return n_object
|
||||
|
||||
340
changedetectionio/notification_service.py
Normal file
340
changedetectionio/notification_service.py
Normal file
@@ -0,0 +1,340 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Notification Service Module
|
||||
Extracted from update_worker.py to provide standalone notification functionality
|
||||
for both sync and async workers
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
import time
|
||||
|
||||
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from changedetectionio.notification import default_notification_format, valid_notification_formats
|
||||
|
||||
# This gets modified on notification time (handler.py) depending on the required notification output
|
||||
CUSTOM_LINEBREAK_PLACEHOLDER='@BR@'
|
||||
|
||||
|
||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||
class NotificationContextData(dict):
|
||||
def __init__(self, initial_data=None, **kwargs):
|
||||
super().__init__({
|
||||
'base_url': None,
|
||||
'current_snapshot': None,
|
||||
'diff': None,
|
||||
'diff_added': None,
|
||||
'diff_full': None,
|
||||
'diff_patch': None,
|
||||
'diff_removed': None,
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
'preview_url': None,
|
||||
'screenshot': None,
|
||||
'triggered_text': None,
|
||||
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
'watch_mime_type': None,
|
||||
'watch_tag': None,
|
||||
'watch_title': None,
|
||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||
})
|
||||
|
||||
# Apply any initial data passed in
|
||||
self.update({'watch_uuid': self.get('uuid')})
|
||||
if initial_data:
|
||||
self.update(initial_data)
|
||||
|
||||
# Apply any keyword arguments
|
||||
if kwargs:
|
||||
self.update(kwargs)
|
||||
|
||||
n_format = self.get('notification_format')
|
||||
if n_format and not valid_notification_formats.get(n_format):
|
||||
raise ValueError(f'Invalid notification format: "{n_format}"')
|
||||
|
||||
def set_random_for_validation(self):
|
||||
import random, string
|
||||
"""Randomly fills all dict keys with random strings (for validation/testing).
|
||||
So we can test the output in the notification body
|
||||
"""
|
||||
for key in self.keys():
|
||||
if key in ['uuid', 'time', 'watch_uuid']:
|
||||
continue
|
||||
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
||||
self[key] = rand_str
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if key == 'notification_format' and isinstance(value, str) and not value.startswith('RANDOM-PLACEHOLDER-'):
|
||||
if not valid_notification_formats.get(value):
|
||||
raise ValueError(f'Invalid notification format: "{value}"')
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
class NotificationService:
|
||||
"""
|
||||
Standalone notification service that handles all notification functionality
|
||||
previously embedded in the update_worker class
|
||||
"""
|
||||
|
||||
def __init__(self, datastore, notification_q):
|
||||
self.datastore = datastore
|
||||
self.notification_q = notification_q
|
||||
|
||||
def queue_notification_for_watch(self, n_object: NotificationContextData, watch):
|
||||
"""
|
||||
Queue a notification for a watch with full diff rendering and template variables
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
|
||||
dates = []
|
||||
trigger_text = ''
|
||||
|
||||
now = time.time()
|
||||
|
||||
if watch:
|
||||
watch_history = watch.history
|
||||
dates = list(watch_history.keys())
|
||||
trigger_text = watch.get('trigger_text', [])
|
||||
|
||||
# Add text that was triggered
|
||||
if len(dates):
|
||||
snapshot_contents = watch.get_history_snapshot(dates[-1])
|
||||
else:
|
||||
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
|
||||
|
||||
# If we ended up here with "System default"
|
||||
if n_object.get('notification_format') == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
|
||||
triggered_text = ''
|
||||
if len(trigger_text):
|
||||
from . import html_tools
|
||||
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
|
||||
if triggered_text:
|
||||
triggered_text = CUSTOM_LINEBREAK_PLACEHOLDER.join(triggered_text)
|
||||
|
||||
# Could be called as a 'test notification' with only 1 snapshot available
|
||||
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
|
||||
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
|
||||
|
||||
if len(dates) > 1:
|
||||
prev_snapshot = watch.get_history_snapshot(dates[-2])
|
||||
current_snapshot = watch.get_history_snapshot(dates[-1])
|
||||
|
||||
ignore_junk = self.datastore.data['settings']['application'].get('ignore_whitespace', False)
|
||||
# plaintext should never use word_mode
|
||||
|
||||
word_mode = True
|
||||
|
||||
base_kwargs = dict(
|
||||
previous_version_file_contents=prev_snapshot,
|
||||
newest_version_file_contents=current_snapshot,
|
||||
ignore_junk=ignore_junk,
|
||||
word_diff=not (word_mode and 'text' in n_object.get('notification_format', '')),
|
||||
)
|
||||
|
||||
n_object.update({
|
||||
'current_snapshot': snapshot_contents,
|
||||
'diff': diff.render_diff(**base_kwargs),
|
||||
'diff_added': diff.render_diff(include_removed=False, **base_kwargs),
|
||||
'diff_full': diff.render_diff(include_equal=True, **base_kwargs),
|
||||
'diff_patch': diff.render_diff(patch_format=True, **base_kwargs),
|
||||
'diff_removed': diff.render_diff(include_added=False, **base_kwargs),
|
||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch.get('uuid') if watch else None,
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
'watch_uuid': watch.get('uuid') if watch else None,
|
||||
'watch_mime_type': watch.get('content-type')
|
||||
})
|
||||
|
||||
if watch:
|
||||
n_object.update(watch.extra_notification_token_values())
|
||||
|
||||
logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s")
|
||||
logger.debug("Queued notification for sending")
|
||||
self.notification_q.put(n_object)
|
||||
|
||||
def _check_cascading_vars(self, var_name, watch):
|
||||
"""
|
||||
Check notification variables in cascading priority:
|
||||
Individual watch settings > Tag settings > Global settings
|
||||
"""
|
||||
from changedetectionio.notification import (
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
|
||||
default_notification_body,
|
||||
default_notification_title
|
||||
)
|
||||
|
||||
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
|
||||
v = watch.get(var_name)
|
||||
if v and not watch.get('notification_muted'):
|
||||
if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
return self.datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
return v
|
||||
|
||||
tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
v = tag.get(var_name)
|
||||
if v and not tag.get('notification_muted'):
|
||||
return v
|
||||
|
||||
if self.datastore.data['settings']['application'].get(var_name):
|
||||
return self.datastore.data['settings']['application'].get(var_name)
|
||||
|
||||
# Otherwise could be defaults
|
||||
if var_name == 'notification_format':
|
||||
return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
if var_name == 'notification_body':
|
||||
return default_notification_body
|
||||
if var_name == 'notification_title':
|
||||
return default_notification_title
|
||||
|
||||
return None
|
||||
|
||||
def send_content_changed_notification(self, watch_uuid):
|
||||
"""
|
||||
Send notification when content changes are detected
|
||||
"""
|
||||
n_object = NotificationContextData()
|
||||
watch = self.datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
return
|
||||
|
||||
watch_history = watch.history
|
||||
dates = list(watch_history.keys())
|
||||
# Theoretically it's possible that this could be just 1 long,
|
||||
# - In the case that the timestamp key was not unique
|
||||
if len(dates) == 1:
|
||||
raise ValueError(
|
||||
"History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?"
|
||||
)
|
||||
|
||||
# Should be a better parent getter in the model object
|
||||
|
||||
# Prefer - Individual watch settings > Tag settings > Global settings (in that order)
|
||||
n_object['notification_urls'] = self._check_cascading_vars('notification_urls', watch)
|
||||
n_object['notification_title'] = self._check_cascading_vars('notification_title', watch)
|
||||
n_object['notification_body'] = self._check_cascading_vars('notification_body', watch)
|
||||
n_object['notification_format'] = self._check_cascading_vars('notification_format', watch)
|
||||
|
||||
# (Individual watch) Only prepare to notify if the rules above matched
|
||||
queued = False
|
||||
if n_object and n_object.get('notification_urls'):
|
||||
queued = True
|
||||
|
||||
count = watch.get('notification_alert_count', 0) + 1
|
||||
self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})
|
||||
|
||||
self.queue_notification_for_watch(n_object=n_object, watch=watch)
|
||||
|
||||
return queued
|
||||
|
||||
def send_filter_failure_notification(self, watch_uuid):
|
||||
"""
|
||||
Send notification when CSS/XPath filters fail consecutively
|
||||
"""
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
|
||||
watch = self.datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
return
|
||||
|
||||
filter_list = ", ".join(watch['include_filters'])
|
||||
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
|
||||
body = f"""Hello,
|
||||
|
||||
Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts.
|
||||
|
||||
It's possible the page changed layout and the filter needs updating ( Try the 'Visual Selector' tab )
|
||||
|
||||
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
|
||||
|
||||
Thanks - Your omniscient changedetection.io installation.
|
||||
"""
|
||||
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
||||
'notification_body': body,
|
||||
'notification_format': self._check_cascading_vars('notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
|
||||
if len(watch['notification_urls']):
|
||||
n_object['notification_urls'] = watch['notification_urls']
|
||||
|
||||
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
||||
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
||||
|
||||
# Only prepare to notify if the rules above matched
|
||||
if 'notification_urls' in n_object:
|
||||
n_object.update({
|
||||
'watch_url': watch['url'],
|
||||
'uuid': watch_uuid,
|
||||
'screenshot': None
|
||||
})
|
||||
self.notification_q.put(n_object)
|
||||
logger.debug(f"Sent filter not found notification for {watch_uuid}")
|
||||
else:
|
||||
logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")
|
||||
|
||||
def send_step_failure_notification(self, watch_uuid, step_n):
|
||||
"""
|
||||
Send notification when browser steps fail consecutively
|
||||
"""
|
||||
watch = self.datastore.data['watching'].get(watch_uuid, False)
|
||||
if not watch:
|
||||
return
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
|
||||
|
||||
step = step_n + 1
|
||||
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
|
||||
|
||||
# {{{{ }}}} because this will be Jinja2 {{ }} tokens
|
||||
body = f"""Hello,
|
||||
|
||||
Your configured browser step at position {step} for the web page watch {{{{watch_url}}}} did not appear on the page after {threshold} attempts, did the page change layout?
|
||||
|
||||
The element may have moved and needs editing, or does it need a delay added?
|
||||
|
||||
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
|
||||
|
||||
Thanks - Your omniscient changedetection.io installation.
|
||||
"""
|
||||
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
|
||||
'notification_body': body,
|
||||
'notification_format': self._check_cascading_vars('notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
|
||||
if len(watch['notification_urls']):
|
||||
n_object['notification_urls'] = watch['notification_urls']
|
||||
|
||||
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
||||
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
||||
|
||||
# Only prepare to notify if the rules above matched
|
||||
if 'notification_urls' in n_object:
|
||||
n_object.update({
|
||||
'watch_url': watch['url'],
|
||||
'uuid': watch_uuid
|
||||
})
|
||||
self.notification_q.put(n_object)
|
||||
logger.error(f"Sent step not found notification for {watch_uuid}")
|
||||
|
||||
|
||||
# Convenience functions for creating notification service instances
|
||||
def create_notification_service(datastore, notification_q):
|
||||
"""
|
||||
Factory function to create a NotificationService instance
|
||||
"""
|
||||
return NotificationService(datastore, notification_q)
|
||||
@@ -27,7 +27,7 @@ class difference_detection_processor():
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
def call_browser(self, preferred_proxy_id=None):
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
@@ -89,7 +89,9 @@ class difference_detection_processor():
|
||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
||||
else:
|
||||
logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||
|
||||
logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
|
||||
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
@@ -102,7 +104,7 @@ class difference_detection_processor():
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
@@ -146,17 +148,19 @@ class difference_detection_processor():
|
||||
|
||||
# And here we go! call the right browser with browser-specific settings
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
|
||||
self.fetcher.run(url=url,
|
||||
timeout=timeout,
|
||||
request_headers=request_headers,
|
||||
request_body=request_body,
|
||||
request_method=request_method,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
)
|
||||
# All fetchers are now async
|
||||
await self.fetcher.run(
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
fetch_favicon=self.watch.favicon_is_expired(),
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
is_binary=is_binary,
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
)
|
||||
|
||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||
self.fetcher.quit(watch=self.watch)
|
||||
|
||||
133
changedetectionio/processors/magic.py
Normal file
133
changedetectionio/processors/magic.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""
|
||||
Content Type Detection and Stream Classification
|
||||
|
||||
This module provides intelligent content-type detection for changedetection.io.
|
||||
It addresses the common problem where HTTP Content-Type headers are missing, incorrect,
|
||||
or too generic, which would otherwise cause the wrong processor to be used.
|
||||
|
||||
The guess_stream_type class combines:
|
||||
1. HTTP Content-Type headers (when available and reliable)
|
||||
2. Python-magic library for MIME detection (analyzing actual file content)
|
||||
3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.)
|
||||
|
||||
This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF,
|
||||
plain text, CSV, YAML, and XML formats - even when servers provide misleading headers.
|
||||
|
||||
Used by: processors/text_json_diff/processor.py and other content processors
|
||||
"""
|
||||
|
||||
# When to apply the 'cdata to real HTML' hack
|
||||
RSS_XML_CONTENT_TYPES = [
|
||||
"application/rss+xml",
|
||||
"application/rdf+xml",
|
||||
"application/atom+xml",
|
||||
"text/rss+xml", # rare, non-standard
|
||||
"application/x-rss+xml", # legacy (older feed software)
|
||||
"application/x-atom+xml", # legacy (older Atom)
|
||||
]
|
||||
|
||||
# JSON Content-types
|
||||
JSON_CONTENT_TYPES = [
|
||||
"application/activity+json",
|
||||
"application/feed+json",
|
||||
"application/json",
|
||||
"application/ld+json",
|
||||
"application/vnd.api+json",
|
||||
]
|
||||
|
||||
|
||||
# Generic XML Content-types (non-RSS/Atom)
|
||||
XML_CONTENT_TYPES = [
|
||||
"text/xml",
|
||||
"application/xml",
|
||||
]
|
||||
|
||||
HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div']
|
||||
|
||||
from loguru import logger
|
||||
|
||||
class guess_stream_type():
|
||||
is_pdf = False
|
||||
is_json = False
|
||||
is_html = False
|
||||
is_plaintext = False
|
||||
is_rss = False
|
||||
is_csv = False
|
||||
is_xml = False # Generic XML, not RSS/Atom
|
||||
is_yaml = False
|
||||
|
||||
def __init__(self, http_content_header, content):
|
||||
import re
|
||||
magic_content_header = http_content_header
|
||||
test_content = content[:200].lower().strip()
|
||||
|
||||
# Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.)
|
||||
test_content_normalized = re.sub(r'<\s+', '<', test_content)
|
||||
|
||||
# Use puremagic for lightweight MIME detection (saves ~14MB vs python-magic)
|
||||
magic_result = None
|
||||
try:
|
||||
import puremagic
|
||||
|
||||
# puremagic needs bytes, so encode if we have a string
|
||||
content_bytes = content[:200].encode('utf-8') if isinstance(content, str) else content[:200]
|
||||
|
||||
# puremagic returns a list of PureMagic objects with confidence scores
|
||||
detections = puremagic.magic_string(content_bytes)
|
||||
if detections:
|
||||
# Get the highest confidence detection
|
||||
mime = detections[0].mime_type
|
||||
logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'")
|
||||
if mime and "/" in mime:
|
||||
magic_result = mime
|
||||
# Ignore generic/fallback mime types
|
||||
if mime in ['application/octet-stream', 'application/x-empty', 'binary']:
|
||||
logger.debug(f"Ignoring generic mime type '{mime}' from puremagic library")
|
||||
# Trust puremagic for non-text types immediately
|
||||
elif mime not in ['text/html', 'text/plain']:
|
||||
magic_content_header = mime
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection")
|
||||
|
||||
# Content-based detection (most reliable for text formats)
|
||||
# Check for HTML patterns first - if found, override magic's text/plain
|
||||
has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS)
|
||||
|
||||
# Always trust headers first
|
||||
if 'text/plain' in http_content_header:
|
||||
self.is_plaintext = True
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
elif has_html_patterns or http_content_header == 'text/html':
|
||||
self.is_html = True
|
||||
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
# magic will call a rss document 'xml'
|
||||
# Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
|
||||
# This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
|
||||
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
|
||||
# Only mark as generic XML if not already detected as RSS
|
||||
if not self.is_rss:
|
||||
self.is_xml = True
|
||||
elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES):
|
||||
# Generic XML that's not RSS/Atom (RSS/Atom checked above)
|
||||
self.is_xml = True
|
||||
elif '%pdf-1' in test_content:
|
||||
self.is_pdf = True
|
||||
elif http_content_header.startswith('text/'):
|
||||
self.is_plaintext = True
|
||||
# Only trust magic for 'text' if no other patterns matched
|
||||
elif 'text' in magic_content_header:
|
||||
self.is_plaintext = True
|
||||
# If magic says text/plain and we found no HTML patterns, trust it
|
||||
elif magic_result == 'text/plain':
|
||||
self.is_plaintext = True
|
||||
logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)")
|
||||
|
||||
@@ -7,7 +7,7 @@ import urllib3
|
||||
import time
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
name = 'Re-stock & Price detection for single product pages'
|
||||
name = 'Re-stock & Price detection for pages with a SINGLE product'
|
||||
description = 'Detects if the product goes back to in-stock'
|
||||
|
||||
class UnableToExtractRestockData(Exception):
|
||||
@@ -79,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
|
||||
# First phase, dead simple scanning of anything that looks useful
|
||||
value = Restock()
|
||||
if data:
|
||||
logger.debug(f"Using jsonpath to find price/availability/etc")
|
||||
logger.debug("Using jsonpath to find price/availability/etc")
|
||||
price_parse = parse('$..(price|Price)')
|
||||
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
|
||||
availability_parse = parse('$..(availability|Availability)')
|
||||
@@ -110,7 +110,7 @@ def get_itemprop_availability(html_content) -> Restock:
|
||||
|
||||
# Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:)
|
||||
if not value.get('price') or value.get('availability'):
|
||||
logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..")
|
||||
logger.debug("Alternatively digging through OpenGraph properties for restock/price info..")
|
||||
jsonpath_expr = parse('$..properties')
|
||||
|
||||
for match in jsonpath_expr.find(data):
|
||||
|
||||
@@ -15,7 +15,7 @@ def _task(watch, update_handler):
|
||||
except FilterNotFoundInResponse as e:
|
||||
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
||||
except ReplyWithContentButNoText as e:
|
||||
text_after_filter = f"Filter found but no text (empty result)"
|
||||
text_after_filter = "Filter found but no text (empty result)"
|
||||
except Exception as e:
|
||||
text_after_filter = f"Error: {str(e)}"
|
||||
|
||||
@@ -32,7 +32,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
||||
from changedetectionio import forms, html_tools
|
||||
from changedetectionio.model.Watch import model as watch_model
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import deepcopy
|
||||
from flask import request
|
||||
import brotli
|
||||
@@ -45,6 +45,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
text_before_filter = ''
|
||||
trigger_line_numbers = []
|
||||
ignore_line_numbers = []
|
||||
blocked_line_numbers = []
|
||||
|
||||
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
@@ -76,13 +77,16 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
|
||||
|
||||
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
|
||||
# Do this as a parallel process because it could take some time
|
||||
with ProcessPoolExecutor(max_workers=2) as executor:
|
||||
future1 = executor.submit(_task, tmp_watch, update_handler)
|
||||
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
|
||||
# Do this as parallel threads (not processes) to avoid pickle issues with Lock objects
|
||||
try:
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
future1 = executor.submit(_task, tmp_watch, update_handler)
|
||||
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
|
||||
|
||||
text_after_filter = future1.result()
|
||||
text_before_filter = future2.result()
|
||||
text_after_filter = future1.result()
|
||||
text_before_filter = future2.result()
|
||||
except Exception as e:
|
||||
x=1
|
||||
|
||||
try:
|
||||
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
|
||||
@@ -101,14 +105,23 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
except Exception as e:
|
||||
text_before_filter = f"Error: {str(e)}"
|
||||
|
||||
try:
|
||||
blocked_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
|
||||
wordlist=tmp_watch.get('text_should_not_be_present', []) + datastore.data['settings']['application'].get('text_should_not_be_present', []),
|
||||
mode='line numbers'
|
||||
)
|
||||
except Exception as e:
|
||||
text_before_filter = f"Error: {str(e)}"
|
||||
|
||||
logger.trace(f"Parsed in {time.time() - now:.3f}s")
|
||||
|
||||
return ({
|
||||
'after_filter': text_after_filter,
|
||||
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
|
||||
'duration': time.time() - now,
|
||||
'trigger_line_numbers': trigger_line_numbers,
|
||||
'ignore_line_numbers': ignore_line_numbers,
|
||||
'after_filter': text_after_filter,
|
||||
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
|
||||
'blocked_line_numbers': blocked_line_numbers,
|
||||
'duration': time.time() - now,
|
||||
'ignore_line_numbers': ignore_line_numbers,
|
||||
'trigger_line_numbers': trigger_line_numbers,
|
||||
})
|
||||
|
||||
|
||||
|
||||
@@ -10,15 +10,20 @@ from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.processors.magic import guess_stream_type
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||
description = 'Detects all text changes where possible'
|
||||
|
||||
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
|
||||
JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:']
|
||||
|
||||
# Assume it's this type if the server says nothing on content-type
|
||||
DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html'
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg, screenshot=None, xpath_data=None):
|
||||
@@ -32,356 +37,560 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
class FilterConfig:
|
||||
"""Consolidates all filter and rule configurations from watch, tags, and global settings."""
|
||||
|
||||
def __init__(self, watch, datastore):
|
||||
self.watch = watch
|
||||
self.datastore = datastore
|
||||
self.watch_uuid = watch.get('uuid')
|
||||
# Cache computed properties to avoid repeated list operations
|
||||
self._include_filters_cache = None
|
||||
self._subtractive_selectors_cache = None
|
||||
|
||||
def _get_merged_rules(self, attr, include_global=False):
|
||||
"""Merge rules from watch, tags, and optionally global settings."""
|
||||
watch_rules = self.watch.get(attr, [])
|
||||
tag_rules = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr=attr)
|
||||
rules = list(dict.fromkeys(watch_rules + tag_rules))
|
||||
|
||||
if include_global:
|
||||
global_rules = self.datastore.data['settings']['application'].get(f'global_{attr}', [])
|
||||
rules = list(dict.fromkeys(rules + global_rules))
|
||||
|
||||
return rules
|
||||
|
||||
@property
|
||||
def include_filters(self):
|
||||
if self._include_filters_cache is None:
|
||||
filters = self._get_merged_rules('include_filters')
|
||||
# Inject LD+JSON price tracker rule if enabled
|
||||
if self.watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
|
||||
filters += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
|
||||
self._include_filters_cache = filters
|
||||
return self._include_filters_cache
|
||||
|
||||
@property
|
||||
def subtractive_selectors(self):
|
||||
if self._subtractive_selectors_cache is None:
|
||||
watch_selectors = self.watch.get("subtractive_selectors", [])
|
||||
tag_selectors = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr='subtractive_selectors')
|
||||
global_selectors = self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
|
||||
self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
|
||||
return self._subtractive_selectors_cache
|
||||
|
||||
@property
|
||||
def extract_text(self):
|
||||
return self._get_merged_rules('extract_text')
|
||||
|
||||
@property
|
||||
def ignore_text(self):
|
||||
return self._get_merged_rules('ignore_text', include_global=True)
|
||||
|
||||
@property
|
||||
def trigger_text(self):
|
||||
return self._get_merged_rules('trigger_text')
|
||||
|
||||
@property
|
||||
def text_should_not_be_present(self):
|
||||
return self._get_merged_rules('text_should_not_be_present')
|
||||
|
||||
@property
|
||||
def has_include_filters(self):
|
||||
return bool(self.include_filters) and bool(self.include_filters[0].strip())
|
||||
|
||||
@property
|
||||
def has_include_json_filters(self):
|
||||
return any(f.strip().startswith(prefix) for f in self.include_filters for prefix in JSON_FILTER_PREFIXES)
|
||||
|
||||
@property
|
||||
def has_subtractive_selectors(self):
|
||||
return bool(self.subtractive_selectors) and bool(self.subtractive_selectors[0].strip())
|
||||
|
||||
|
||||
class ContentTransformer:
|
||||
"""Handles text transformations like trimming, sorting, and deduplication."""
|
||||
|
||||
@staticmethod
|
||||
def trim_whitespace(text):
|
||||
"""Remove leading/trailing whitespace from each line."""
|
||||
# Use generator expression to avoid building intermediate list
|
||||
return '\n'.join(line.strip() for line in text.replace("\n\n", "\n").splitlines())
|
||||
|
||||
@staticmethod
|
||||
def remove_duplicate_lines(text):
|
||||
"""Remove duplicate lines while preserving order."""
|
||||
return '\n'.join(dict.fromkeys(line for line in text.replace("\n\n", "\n").splitlines()))
|
||||
|
||||
@staticmethod
|
||||
def sort_alphabetically(text):
|
||||
"""Sort lines alphabetically (case-insensitive)."""
|
||||
# Remove double line feeds before sorting
|
||||
text = text.replace("\n\n", "\n")
|
||||
return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
|
||||
|
||||
@staticmethod
|
||||
def extract_by_regex(text, regex_patterns):
|
||||
"""Extract text matching regex patterns."""
|
||||
# Use list of strings instead of concatenating lists repeatedly (avoids O(n²) behavior)
|
||||
regex_matched_output = []
|
||||
|
||||
for s_re in regex_patterns:
|
||||
# Check if it's perl-style regex /.../
|
||||
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
|
||||
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
|
||||
result = re.findall(regex, text)
|
||||
|
||||
for match in result:
|
||||
if type(match) is tuple:
|
||||
regex_matched_output.extend(match)
|
||||
regex_matched_output.append('\n')
|
||||
else:
|
||||
regex_matched_output.append(match)
|
||||
regex_matched_output.append('\n')
|
||||
else:
|
||||
# Plain text search (case-insensitive)
|
||||
r = re.compile(re.escape(s_re), re.IGNORECASE)
|
||||
res = r.findall(text)
|
||||
if res:
|
||||
for match in res:
|
||||
regex_matched_output.append(match)
|
||||
regex_matched_output.append('\n')
|
||||
|
||||
return ''.join(regex_matched_output) if regex_matched_output else ''
|
||||
|
||||
|
||||
class RuleEngine:
|
||||
"""Evaluates blocking rules (triggers, conditions, text_should_not_be_present)."""
|
||||
|
||||
@staticmethod
|
||||
def evaluate_trigger_text(content, trigger_patterns):
|
||||
"""
|
||||
Check if trigger text is present. If trigger_text is configured,
|
||||
content is blocked UNLESS the trigger is found.
|
||||
Returns True if blocked, False if allowed.
|
||||
"""
|
||||
if not trigger_patterns:
|
||||
return False
|
||||
|
||||
# Assume blocked if trigger_text is configured
|
||||
result = html_tools.strip_ignore_text(
|
||||
content=str(content),
|
||||
wordlist=trigger_patterns,
|
||||
mode="line numbers"
|
||||
)
|
||||
# Unblock if trigger was found
|
||||
return not bool(result)
|
||||
|
||||
@staticmethod
|
||||
def evaluate_text_should_not_be_present(content, patterns):
|
||||
"""
|
||||
Check if forbidden text is present. If found, block the change.
|
||||
Returns True if blocked, False if allowed.
|
||||
"""
|
||||
if not patterns:
|
||||
return False
|
||||
|
||||
result = html_tools.strip_ignore_text(
|
||||
content=str(content),
|
||||
wordlist=patterns,
|
||||
mode="line numbers"
|
||||
)
|
||||
# Block if forbidden text was found
|
||||
return bool(result)
|
||||
|
||||
@staticmethod
|
||||
def evaluate_conditions(watch, datastore, content):
|
||||
"""
|
||||
Evaluate custom conditions ruleset.
|
||||
Returns True if blocked, False if allowed.
|
||||
"""
|
||||
if not watch.get('conditions') or not watch.get('conditions_match_logic'):
|
||||
return False
|
||||
|
||||
conditions_result = execute_ruleset_against_all_plugins(
|
||||
current_watch_uuid=watch.get('uuid'),
|
||||
application_datastruct=datastore.data,
|
||||
ephemeral_data={'text': content}
|
||||
)
|
||||
|
||||
# Block if conditions not met
|
||||
return not conditions_result.get('result')
|
||||
|
||||
|
||||
class ContentProcessor:
|
||||
"""Handles content preprocessing, filtering, and extraction."""
|
||||
|
||||
def __init__(self, fetcher, watch, filter_config, datastore):
|
||||
self.fetcher = fetcher
|
||||
self.watch = watch
|
||||
self.filter_config = filter_config
|
||||
self.datastore = datastore
|
||||
|
||||
def preprocess_rss(self, content):
|
||||
"""
|
||||
Convert CDATA/comments in RSS to usable text.
|
||||
|
||||
Supports two RSS processing modes:
|
||||
- 'default': Inline CDATA replacement (original behavior)
|
||||
- 'formatted': Format RSS items with title, link, guid, pubDate, and description (CDATA unmarked)
|
||||
"""
|
||||
from changedetectionio import rss_tools
|
||||
rss_mode = self.datastore.data["settings"]["application"].get("rss_reader_mode")
|
||||
if rss_mode:
|
||||
# Format RSS items nicely with CDATA content unmarked and converted to text
|
||||
return rss_tools.format_rss_items(content)
|
||||
else:
|
||||
# Default: Original inline CDATA replacement
|
||||
return cdata_in_document_to_text(html_content=content)
|
||||
|
||||
def preprocess_pdf(self, raw_content):
|
||||
"""Convert PDF to HTML using external tool."""
|
||||
from shutil import which
|
||||
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
||||
if not which(tool):
|
||||
raise PDFToHTMLToolNotFound(
|
||||
f"Command-line `{tool}` tool was not found in system PATH, was it installed?"
|
||||
)
|
||||
|
||||
import subprocess
|
||||
proc = subprocess.Popen(
|
||||
[tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
|
||||
stdout=subprocess.PIPE,
|
||||
stdin=subprocess.PIPE
|
||||
)
|
||||
proc.stdin.write(raw_content)
|
||||
proc.stdin.close()
|
||||
html_content = proc.stdout.read().decode('utf-8')
|
||||
proc.wait(timeout=60)
|
||||
|
||||
# Add metadata for change detection
|
||||
metadata = (
|
||||
f"<p>Added by changedetection.io: Document checksum - "
|
||||
f"{hashlib.md5(raw_content).hexdigest().upper()} "
|
||||
f"Original file size - {len(raw_content)} bytes</p>"
|
||||
)
|
||||
return html_content.replace('</body>', metadata + '</body>')
|
||||
|
||||
def preprocess_json(self, raw_content):
|
||||
"""Format and sort JSON content."""
|
||||
# Then we re-format it, else it does have filters (later on) which will reformat it anyway
|
||||
content = html_tools.extract_json_as_string(content=raw_content, json_filter="json:$")
|
||||
|
||||
# Sort JSON to avoid false alerts from reordering
|
||||
try:
|
||||
content = json.dumps(json.loads(content), sort_keys=True, indent=4)
|
||||
except Exception:
|
||||
# Might be malformed JSON, continue anyway
|
||||
pass
|
||||
|
||||
return content
|
||||
|
||||
def apply_include_filters(self, content, stream_content_type):
|
||||
"""Apply CSS, XPath, or JSON filters to extract specific content."""
|
||||
filtered_content = ""
|
||||
|
||||
for filter_rule in self.filter_config.include_filters:
|
||||
# XPath filters
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
filtered_content += html_tools.xpath_filter(
|
||||
xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=content,
|
||||
append_pretty_line_formatting=not self.watch.is_source_type_url,
|
||||
is_rss=stream_content_type.is_rss
|
||||
)
|
||||
|
||||
# XPath1 filters (first match only)
|
||||
elif filter_rule.startswith('xpath1:'):
|
||||
filtered_content += html_tools.xpath1_filter(
|
||||
xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=content,
|
||||
append_pretty_line_formatting=not self.watch.is_source_type_url,
|
||||
is_rss=stream_content_type.is_rss
|
||||
)
|
||||
|
||||
# JSON filters
|
||||
elif any(filter_rule.startswith(prefix) for prefix in JSON_FILTER_PREFIXES):
|
||||
filtered_content += html_tools.extract_json_as_string(
|
||||
content=content,
|
||||
json_filter=filter_rule
|
||||
)
|
||||
|
||||
# CSS selectors, default fallback
|
||||
else:
|
||||
filtered_content += html_tools.include_filters(
|
||||
include_filters=filter_rule,
|
||||
html_content=content,
|
||||
append_pretty_line_formatting=not self.watch.is_source_type_url
|
||||
)
|
||||
|
||||
# Raise error if filter returned nothing
|
||||
if not filtered_content.strip():
|
||||
raise FilterNotFoundInResponse(
|
||||
msg=self.filter_config.include_filters,
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
return filtered_content
|
||||
|
||||
def apply_subtractive_selectors(self, content):
|
||||
"""Remove elements matching subtractive selectors."""
|
||||
return html_tools.element_removal(self.filter_config.subtractive_selectors, content)
|
||||
|
||||
def extract_text_from_html(self, html_content, stream_content_type):
|
||||
"""Convert HTML to plain text."""
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
return html_tools.html_to_text(
|
||||
html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=stream_content_type.is_rss
|
||||
)
|
||||
|
||||
|
||||
class ChecksumCalculator:
|
||||
"""Calculates checksums with various options."""
|
||||
|
||||
@staticmethod
|
||||
def calculate(text, ignore_whitespace=False):
|
||||
"""Calculate MD5 checksum of text content."""
|
||||
if ignore_whitespace:
|
||||
text = text.translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
changed_detected = False
|
||||
html_content = ""
|
||||
screenshot = False # as bytes
|
||||
stripped_text_from_html = ""
|
||||
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
# Initialize components
|
||||
filter_config = FilterConfig(watch, self.datastore)
|
||||
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
|
||||
transformer = ContentTransformer()
|
||||
rule_engine = RuleEngine()
|
||||
|
||||
# Get content type and stream info
|
||||
ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower()
|
||||
stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content)
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
|
||||
url = watch.link
|
||||
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Track the content type
|
||||
update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
|
||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||
# Saves a lot of CPU
|
||||
# Track the content type and checksum before filters
|
||||
update_obj['content_type'] = ctype_header
|
||||
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||
|
||||
# Fetching complete, now filters
|
||||
# === CONTENT PREPROCESSING ===
|
||||
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
|
||||
content = self.fetcher.content
|
||||
|
||||
# @note: I feel like the following should be in a more obvious chain system
|
||||
# - Check filter text
|
||||
# - Is the checksum different?
|
||||
# - Do we convert to JSON?
|
||||
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
|
||||
# return content().textfilter().jsonextract().checksumcompare() ?
|
||||
# RSS preprocessing
|
||||
if stream_content_type.is_rss:
|
||||
content = content_processor.preprocess_rss(content)
|
||||
if self.datastore.data["settings"]["application"].get("rss_reader_mode"):
|
||||
# Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc)
|
||||
stream_content_type.is_rss = False
|
||||
stream_content_type.is_html = True
|
||||
self.fetcher.content = content
|
||||
|
||||
is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
is_html = not is_json
|
||||
is_rss = False
|
||||
# PDF preprocessing
|
||||
if watch.is_pdf or stream_content_type.is_pdf:
|
||||
content = content_processor.preprocess_pdf(raw_content=self.fetcher.raw_content)
|
||||
stream_content_type.is_html = True
|
||||
|
||||
ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
# Go into RSS preprocess for converting CDATA/comment to usable text
|
||||
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
|
||||
if '<rss' in self.fetcher.content[:100].lower():
|
||||
self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
|
||||
is_rss = True
|
||||
# JSON - Always reformat it nicely for consistency.
|
||||
|
||||
# source: support, basically treat it as plaintext
|
||||
if stream_content_type.is_json:
|
||||
if not filter_config.has_include_json_filters:
|
||||
content = content_processor.preprocess_json(raw_content=content)
|
||||
#else, otherwise it gets sorted/formatted in the filter stage anyway
|
||||
|
||||
# HTML obfuscation workarounds
|
||||
if stream_content_type.is_html:
|
||||
content = html_tools.workarounds_for_obfuscations(content)
|
||||
|
||||
# Check for LD+JSON price data (for HTML content)
|
||||
if stream_content_type.is_html:
|
||||
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(content)
|
||||
|
||||
# === FILTER APPLICATION ===
|
||||
# Start with content reference, avoid copy until modification
|
||||
html_content = content
|
||||
|
||||
# Apply include filters (CSS, XPath, JSON)
|
||||
# Except for plaintext (incase they tried to confuse the system, it will HTML escape
|
||||
#if not stream_content_type.is_plaintext:
|
||||
if filter_config.has_include_filters:
|
||||
html_content = content_processor.apply_include_filters(content, stream_content_type)
|
||||
|
||||
# Apply subtractive selectors
|
||||
if filter_config.has_subtractive_selectors:
|
||||
html_content = content_processor.apply_subtractive_selectors(html_content)
|
||||
|
||||
# === TEXT EXTRACTION ===
|
||||
if watch.is_source_type_url:
|
||||
is_html = False
|
||||
is_json = False
|
||||
|
||||
inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10]
|
||||
if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
|
||||
from shutil import which
|
||||
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
||||
if not which(tool):
|
||||
raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool))
|
||||
|
||||
import subprocess
|
||||
proc = subprocess.Popen(
|
||||
[tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
|
||||
stdout=subprocess.PIPE,
|
||||
stdin=subprocess.PIPE)
|
||||
proc.stdin.write(self.fetcher.raw_content)
|
||||
proc.stdin.close()
|
||||
self.fetcher.content = proc.stdout.read().decode('utf-8')
|
||||
proc.wait(timeout=60)
|
||||
|
||||
# Add a little metadata so we know if the file changes (like if an image changes, but the text is the same
|
||||
# @todo may cause problems with non-UTF8?
|
||||
metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format(
|
||||
hashlib.md5(self.fetcher.raw_content).hexdigest().upper(),
|
||||
len(self.fetcher.content))
|
||||
|
||||
self.fetcher.content = self.fetcher.content.replace('</body>', metadata + '</body>')
|
||||
|
||||
# Better would be if Watch.model could access the global data also
|
||||
# and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
|
||||
# https://realpython.com/inherit-python-dict/ instead of doing it procedurely
|
||||
include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
|
||||
|
||||
# 1845 - remove duplicated filters in both group and watch include filter
|
||||
include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
|
||||
|
||||
subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
|
||||
*watch.get("subtractive_selectors", []),
|
||||
*self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
|
||||
]
|
||||
|
||||
# Inject a virtual LD+JSON price tracker rule
|
||||
if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
|
||||
include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
|
||||
|
||||
has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip())
|
||||
has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip())
|
||||
|
||||
if is_json and not has_filter_rule:
|
||||
include_filters_rule.append("json:$")
|
||||
has_filter_rule = True
|
||||
|
||||
if is_json:
|
||||
# Sort the JSON so we dont get false alerts when the content is just re-ordered
|
||||
try:
|
||||
self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True)
|
||||
except Exception as e:
|
||||
# Might have just been a snippet, or otherwise bad JSON, continue
|
||||
pass
|
||||
|
||||
if has_filter_rule:
|
||||
for filter in include_filters_rule:
|
||||
if any(prefix in filter for prefix in json_filter_prefixes):
|
||||
stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter)
|
||||
is_html = False
|
||||
|
||||
if is_html or watch.is_source_type_url:
|
||||
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
|
||||
html_content = self.fetcher.content
|
||||
|
||||
# If not JSON, and if it's not text/plain..
|
||||
if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower():
|
||||
# Don't run get_text or xpath/css filters on plaintext
|
||||
stripped_text_from_html = html_content
|
||||
# For source URLs, keep raw content
|
||||
stripped_text = html_content
|
||||
elif stream_content_type.is_plaintext:
|
||||
# For plaintext, keep as-is without HTML-to-text conversion
|
||||
stripped_text = html_content
|
||||
else:
|
||||
# Extract text from HTML/RSS content (not generic XML)
|
||||
if stream_content_type.is_html or stream_content_type.is_rss:
|
||||
stripped_text = content_processor.extract_text_from_html(html_content, stream_content_type)
|
||||
else:
|
||||
# Does it have some ld+json price data? used for easier monitoring
|
||||
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content)
|
||||
|
||||
# Then we assume HTML
|
||||
if has_filter_rule:
|
||||
html_content = ""
|
||||
|
||||
for filter_rule in include_filters_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
|
||||
elif filter_rule.startswith('xpath1:'):
|
||||
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
else:
|
||||
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url)
|
||||
|
||||
if not html_content.strip():
|
||||
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
|
||||
|
||||
if has_subtractive_selectors:
|
||||
html_content = html_tools.element_removal(subtractive_selectors, html_content)
|
||||
|
||||
if watch.is_source_type_url:
|
||||
stripped_text_from_html = html_content
|
||||
else:
|
||||
# extract text
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss) # 1874 activate the <title workaround hack
|
||||
stripped_text = html_content
|
||||
|
||||
# === TEXT TRANSFORMATIONS ===
|
||||
if watch.get('trim_text_whitespace'):
|
||||
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
|
||||
stripped_text = transformer.trim_whitespace(stripped_text)
|
||||
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
# Also used to calculate/show what was removed
|
||||
text_content_before_ignored_filter = stripped_text_from_html
|
||||
|
||||
# @todo whitespace coming from missing rtrim()?
|
||||
# stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
|
||||
# Rewrite's the processing text based on only what diff result they want to see
|
||||
# Save text before ignore filters (for diff calculation)
|
||||
text_content_before_ignored_filter = stripped_text
|
||||
|
||||
# === DIFF FILTERING ===
|
||||
# If user wants specific diff types (added/removed/replaced only)
|
||||
if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
|
||||
# Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
|
||||
from changedetectionio import diff
|
||||
# needs to not include (added) etc or it may get used twice
|
||||
# Replace the processed text with the preferred result
|
||||
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
|
||||
newest_version_file_contents=stripped_text_from_html,
|
||||
include_equal=False, # not the same lines
|
||||
include_added=watch.get('filter_text_added', True),
|
||||
include_removed=watch.get('filter_text_removed', True),
|
||||
include_replaced=watch.get('filter_text_replaced', True),
|
||||
line_feed_sep="\n",
|
||||
include_change_type_prefix=False)
|
||||
stripped_text = self._apply_diff_filtering(watch, stripped_text, text_content_before_ignored_filter)
|
||||
if stripped_text is None:
|
||||
# No differences found, but content exists
|
||||
c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True)
|
||||
return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8')
|
||||
|
||||
watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))
|
||||
|
||||
if not rendered_diff and stripped_text_from_html:
|
||||
# We had some content, but no differences were found
|
||||
# Store our new file as the MD5 so it will trigger in the future
|
||||
c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
|
||||
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
|
||||
else:
|
||||
stripped_text_from_html = rendered_diff
|
||||
|
||||
# Treat pages with no renderable text content as a change? No by default
|
||||
# === EMPTY PAGE CHECK ===
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
|
||||
raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
has_filters=has_filter_rule,
|
||||
html_content=html_content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
||||
# in the future we'll implement other mechanisms.
|
||||
if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0:
|
||||
raise content_fetchers.exceptions.ReplyWithContentButNoText(
|
||||
url=url,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
has_filters=filter_config.has_include_filters,
|
||||
html_content=html_content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# 615 Extract text by regex
|
||||
extract_text = watch.get('extract_text', [])
|
||||
extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')
|
||||
if len(extract_text) > 0:
|
||||
regex_matched_output = []
|
||||
for s_re in extract_text:
|
||||
# incase they specified something in '/.../x'
|
||||
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
|
||||
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
|
||||
result = re.findall(regex, stripped_text_from_html)
|
||||
|
||||
for l in result:
|
||||
if type(l) is tuple:
|
||||
# @todo - some formatter option default (between groups)
|
||||
regex_matched_output += list(l) + ['\n']
|
||||
else:
|
||||
# @todo - some formatter option default (between each ungrouped result)
|
||||
regex_matched_output += [l] + ['\n']
|
||||
else:
|
||||
# Doesnt look like regex, just hunt for plaintext and return that which matches
|
||||
# `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
|
||||
r = re.compile(re.escape(s_re), re.IGNORECASE)
|
||||
res = r.findall(stripped_text_from_html)
|
||||
if res:
|
||||
for match in res:
|
||||
regex_matched_output += [match] + ['\n']
|
||||
|
||||
##########################################################
|
||||
stripped_text_from_html = ''
|
||||
|
||||
if regex_matched_output:
|
||||
# @todo some formatter for presentation?
|
||||
stripped_text_from_html = ''.join(regex_matched_output)
|
||||
# === REGEX EXTRACTION ===
|
||||
if filter_config.extract_text:
|
||||
extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
|
||||
stripped_text = extracted
|
||||
|
||||
# === MORE TEXT TRANSFORMATIONS ===
|
||||
if watch.get('remove_duplicate_lines'):
|
||||
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
|
||||
|
||||
stripped_text = transformer.remove_duplicate_lines(stripped_text)
|
||||
|
||||
if watch.get('sort_text_alphabetically'):
|
||||
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
|
||||
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
|
||||
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
|
||||
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
|
||||
stripped_text = transformer.sort_alphabetically(stripped_text)
|
||||
|
||||
### CALCULATE MD5
|
||||
# If there's text to ignore
|
||||
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||
text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
|
||||
# === CHECKSUM CALCULATION ===
|
||||
text_for_checksuming = stripped_text
|
||||
|
||||
text_for_checksuming = stripped_text_from_html
|
||||
if text_to_ignore:
|
||||
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
||||
# Apply ignore_text for checksum calculation
|
||||
if filter_config.ignore_text:
|
||||
text_for_checksuming = html_tools.strip_ignore_text(stripped_text, filter_config.ignore_text)
|
||||
|
||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||
if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
|
||||
else:
|
||||
fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()
|
||||
# Optionally remove ignored lines from output
|
||||
strip_ignored_lines = watch.get('strip_ignored_lines')
|
||||
if strip_ignored_lines is None:
|
||||
strip_ignored_lines = self.datastore.data['settings']['application'].get('strip_ignored_lines')
|
||||
if strip_ignored_lines:
|
||||
stripped_text = text_for_checksuming
|
||||
|
||||
############ Blocking rules, after checksum #################
|
||||
# Calculate checksum
|
||||
ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace', False)
|
||||
fetched_md5 = ChecksumCalculator.calculate(text_for_checksuming, ignore_whitespace=ignore_whitespace)
|
||||
|
||||
# === BLOCKING RULES EVALUATION ===
|
||||
blocked = False
|
||||
trigger_text = watch.get('trigger_text', [])
|
||||
trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')
|
||||
if len(trigger_text):
|
||||
# Assume blocked
|
||||
|
||||
# Check trigger_text
|
||||
if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
|
||||
blocked = True
|
||||
# Filter and trigger works the same, so reuse it
|
||||
# It should return the line numbers that match
|
||||
# Unblock flow if the trigger was found (some text remained after stripped what didnt match)
|
||||
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||
wordlist=trigger_text,
|
||||
mode="line numbers")
|
||||
# Unblock if the trigger was found
|
||||
if result:
|
||||
blocked = False
|
||||
|
||||
text_should_not_be_present = watch.get('text_should_not_be_present', [])
|
||||
text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')
|
||||
if len(text_should_not_be_present):
|
||||
# If anything matched, then we should block a change from happening
|
||||
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||
wordlist=text_should_not_be_present,
|
||||
mode="line numbers")
|
||||
if result:
|
||||
blocked = True
|
||||
# Check text_should_not_be_present
|
||||
if rule_engine.evaluate_text_should_not_be_present(stripped_text, filter_config.text_should_not_be_present):
|
||||
blocked = True
|
||||
|
||||
# And check if 'conditions' will let this pass through
|
||||
if watch.get('conditions') and watch.get('conditions_match_logic'):
|
||||
conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
|
||||
application_datastruct=self.datastore.data,
|
||||
ephemeral_data={
|
||||
'text': stripped_text_from_html
|
||||
}
|
||||
)
|
||||
# Check custom conditions
|
||||
if rule_engine.evaluate_conditions(watch, self.datastore, stripped_text):
|
||||
blocked = True
|
||||
|
||||
if not conditions_result.get('result'):
|
||||
# Conditions say "Condition not met" so we block it.
|
||||
blocked = True
|
||||
|
||||
# Looks like something changed, but did it match all the rules?
|
||||
# === CHANGE DETECTION ===
|
||||
if blocked:
|
||||
changed_detected = False
|
||||
else:
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
# Compare checksums
|
||||
if watch.get('previous_md5') != fetched_md5:
|
||||
changed_detected = True
|
||||
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
|
||||
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
|
||||
# On first run, initialize previous_md5
|
||||
if not watch.get('previous_md5'):
|
||||
watch['previous_md5'] = fetched_md5
|
||||
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
|
||||
if changed_detected:
|
||||
if watch.get('check_unique_lines', False):
|
||||
ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
|
||||
# === UNIQUE LINES CHECK ===
|
||||
if changed_detected and watch.get('check_unique_lines', False):
|
||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
|
||||
lines=stripped_text.splitlines(),
|
||||
ignore_whitespace=ignore_whitespace
|
||||
)
|
||||
|
||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
|
||||
lines=stripped_text_from_html.splitlines(),
|
||||
ignore_whitespace=ignore_whitespace
|
||||
)
|
||||
if not has_unique_lines:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
|
||||
changed_detected = False
|
||||
else:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
|
||||
|
||||
# One or more lines? unsure?
|
||||
if not has_unique_lines:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
|
||||
changed_detected = False
|
||||
else:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
|
||||
# Note: Explicit cleanup is only needed here because text_json_diff handles
|
||||
# large strings (100KB-300KB for RSS/HTML). The other processors work with
|
||||
# small strings and don't need this.
|
||||
#
|
||||
# Python would clean these up automatically, but explicit `del` frees memory
|
||||
# immediately rather than waiting for function return, reducing peak memory usage.
|
||||
del content
|
||||
if 'html_content' in locals() and html_content is not stripped_text:
|
||||
del html_content
|
||||
if 'text_content_before_ignored_filter' in locals() and text_content_before_ignored_filter is not stripped_text:
|
||||
del text_content_before_ignored_filter
|
||||
if 'text_for_checksuming' in locals() and text_for_checksuming is not stripped_text:
|
||||
del text_for_checksuming
|
||||
|
||||
return changed_detected, update_obj, stripped_text
|
||||
|
||||
# stripped_text_from_html - Everything after filters and NO 'ignored' content
|
||||
return changed_detected, update_obj, stripped_text_from_html
|
||||
def _apply_diff_filtering(self, watch, stripped_text, text_before_filter):
|
||||
"""Apply user's diff filtering preferences (show only added/removed/replaced lines)."""
|
||||
from changedetectionio import diff
|
||||
|
||||
rendered_diff = diff.render_diff(
|
||||
previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
|
||||
newest_version_file_contents=stripped_text,
|
||||
include_equal=False,
|
||||
include_added=watch.get('filter_text_added', True),
|
||||
include_removed=watch.get('filter_text_removed', True),
|
||||
include_replaced=watch.get('filter_text_replaced', True),
|
||||
include_change_type_prefix=False
|
||||
)
|
||||
|
||||
watch.save_last_text_fetched_before_filters(text_before_filter.encode('utf-8'))
|
||||
|
||||
if not rendered_diff and stripped_text:
|
||||
# No differences found
|
||||
return None
|
||||
|
||||
return rendered_diff
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[pytest]
|
||||
addopts = --no-start-live-server --live-server-port=5005
|
||||
addopts = --no-start-live-server --live-server-port=0
|
||||
#testpaths = tests pytest_invenio
|
||||
#live_server_scope = function
|
||||
|
||||
|
||||
435
changedetectionio/queue_handlers.py
Normal file
435
changedetectionio/queue_handlers.py
Normal file
@@ -0,0 +1,435 @@
|
||||
from blinker import signal
|
||||
from loguru import logger
|
||||
from typing import Dict, List, Any, Optional
|
||||
import heapq
|
||||
import queue
|
||||
import threading
|
||||
|
||||
try:
|
||||
import janus
|
||||
except ImportError:
|
||||
logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus")
|
||||
raise
|
||||
|
||||
|
||||
class RecheckPriorityQueue:
|
||||
"""
|
||||
Ultra-reliable priority queue using janus for async/sync bridging.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers (the actual fetchers/processors) and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
|
||||
- Synchronous code (Flask, threads) cannot use async methods without blocking
|
||||
- Async code cannot use sync methods without blocking the event loop
|
||||
- janus provides the only safe bridge between these two worlds
|
||||
|
||||
Attempting to unify to async-only would require:
|
||||
- Converting all Flask routes to async (major breaking change)
|
||||
- Using asyncio.run() in sync contexts (causes deadlocks)
|
||||
- Thread-pool wrapping (adds complexity and overhead)
|
||||
|
||||
Minimal implementation focused on reliability:
|
||||
- Pure janus for sync/async bridge
|
||||
- Thread-safe priority ordering
|
||||
- Bulletproof error handling with critical logging
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, ticker thread
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
|
||||
# Priority storage - thread-safe
|
||||
self._priority_items = []
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Signals for UI updates
|
||||
self.queue_length_signal = signal('queue_length')
|
||||
|
||||
logger.debug("RecheckPriorityQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
|
||||
raise
|
||||
|
||||
# SYNC INTERFACE (for ticker thread)
|
||||
def put(self, item, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with priority ordering"""
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus sync queue
|
||||
self.sync_q.put(True, block=block, timeout=timeout)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}")
|
||||
return False
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get with priority ordering"""
|
||||
try:
|
||||
# Wait for notification
|
||||
self.sync_q.get(block=block, timeout=timeout)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical(f"CRITICAL: Queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
|
||||
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get item from queue: {str(e)}")
|
||||
raise
|
||||
|
||||
# ASYNC INTERFACE (for workers)
|
||||
async def async_put(self, item):
|
||||
"""Pure async put with priority ordering"""
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus async queue
|
||||
await self.async_q.put(True)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get with priority ordering"""
|
||||
try:
|
||||
# Wait for notification
|
||||
await self.async_q.get()
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
|
||||
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}")
|
||||
raise
|
||||
|
||||
# UTILITY METHODS
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
with self._lock:
|
||||
return len(self._priority_items)
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queue size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
logger.debug("RecheckPriorityQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")
|
||||
|
||||
# COMPATIBILITY METHODS (from original implementation)
|
||||
@property
|
||||
def queue(self):
|
||||
"""Provide compatibility with original queue access"""
|
||||
try:
|
||||
with self._lock:
|
||||
return list(self._priority_items)
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queue list: {str(e)}")
|
||||
return []
|
||||
|
||||
def get_uuid_position(self, target_uuid: str) -> Dict[str, Any]:
|
||||
"""Find position of UUID in queue"""
|
||||
try:
|
||||
with self._lock:
|
||||
queue_list = list(self._priority_items)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
|
||||
|
||||
# Find target item
|
||||
for item in queue_list:
|
||||
if (hasattr(item, 'item') and isinstance(item.item, dict) and
|
||||
item.item.get('uuid') == target_uuid):
|
||||
|
||||
# Count items with higher priority
|
||||
position = sum(1 for other in queue_list if other.priority < item.priority)
|
||||
return {
|
||||
'position': position,
|
||||
'total_items': total_items,
|
||||
'priority': item.priority,
|
||||
'found': True
|
||||
}
|
||||
|
||||
return {'position': None, 'total_items': total_items, 'priority': None, 'found': False}
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get UUID position for {target_uuid}: {str(e)}")
|
||||
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
|
||||
|
||||
def get_all_queued_uuids(self, limit: Optional[int] = None, offset: int = 0) -> Dict[str, Any]:
|
||||
"""Get all queued UUIDs with pagination"""
|
||||
try:
|
||||
with self._lock:
|
||||
queue_list = sorted(self._priority_items) # Sort by priority
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
|
||||
|
||||
# Apply pagination
|
||||
end_idx = min(offset + limit, total_items) if limit else total_items
|
||||
items_to_process = queue_list[offset:end_idx]
|
||||
|
||||
result = []
|
||||
for position, item in enumerate(items_to_process, start=offset):
|
||||
if (hasattr(item, 'item') and isinstance(item.item, dict) and
|
||||
'uuid' in item.item):
|
||||
result.append({
|
||||
'uuid': item.item['uuid'],
|
||||
'position': position,
|
||||
'priority': item.priority
|
||||
})
|
||||
|
||||
return {
|
||||
'items': result,
|
||||
'total_items': total_items,
|
||||
'returned_items': len(result),
|
||||
'has_more': (offset + len(result)) < total_items
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get all queued UUIDs: {str(e)}")
|
||||
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
|
||||
|
||||
def get_queue_summary(self) -> Dict[str, Any]:
|
||||
"""Get queue summary statistics"""
|
||||
try:
|
||||
with self._lock:
|
||||
queue_list = list(self._priority_items)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {
|
||||
'total_items': 0, 'priority_breakdown': {},
|
||||
'immediate_items': 0, 'clone_items': 0, 'scheduled_items': 0
|
||||
}
|
||||
|
||||
immediate_items = clone_items = scheduled_items = 0
|
||||
priority_counts = {}
|
||||
|
||||
for item in queue_list:
|
||||
priority = item.priority
|
||||
priority_counts[priority] = priority_counts.get(priority, 0) + 1
|
||||
|
||||
if priority == 1:
|
||||
immediate_items += 1
|
||||
elif priority == 5:
|
||||
clone_items += 1
|
||||
elif priority > 100:
|
||||
scheduled_items += 1
|
||||
|
||||
return {
|
||||
'total_items': total_items,
|
||||
'priority_breakdown': priority_counts,
|
||||
'immediate_items': immediate_items,
|
||||
'clone_items': clone_items,
|
||||
'scheduled_items': scheduled_items,
|
||||
'min_priority': min(priority_counts.keys()) if priority_counts else None,
|
||||
'max_priority': max(priority_counts.keys()) if priority_counts else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queue summary: {str(e)}")
|
||||
return {'total_items': 0, 'priority_breakdown': {}, 'immediate_items': 0,
|
||||
'clone_items': 0, 'scheduled_items': 0}
|
||||
|
||||
# PRIVATE METHODS
|
||||
def _get_item_uuid(self, item) -> str:
|
||||
"""Safely extract UUID from item for logging"""
|
||||
try:
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict):
|
||||
return item.item.get('uuid', 'unknown')
|
||||
except Exception:
|
||||
pass
|
||||
return 'unknown'
|
||||
|
||||
def _emit_put_signals(self, item):
|
||||
"""Emit signals when item is added"""
|
||||
try:
|
||||
# Watch update signal
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=item.item['uuid'])
|
||||
|
||||
# Queue length signal
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}")
|
||||
|
||||
def _emit_get_signals(self):
|
||||
"""Emit signals when item is removed"""
|
||||
try:
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit get signals: {str(e)}")
|
||||
|
||||
|
||||
class NotificationQueue:
|
||||
"""
|
||||
Ultra-reliable notification queue using pure janus.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts.
|
||||
See RecheckPriorityQueue docstring above for detailed explanation.
|
||||
|
||||
Simple wrapper around janus with bulletproof error handling.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, threads
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
self.notification_event_signal = signal('notification_event')
|
||||
logger.debug("NotificationQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
|
||||
raise
|
||||
|
||||
def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with signal emission"""
|
||||
try:
|
||||
self.sync_q.put(item, block=block, timeout=timeout)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_put(self, item: Dict[str, Any]):
|
||||
"""Pure async put with signal emission"""
|
||||
try:
|
||||
await self.async_q.put(item)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get"""
|
||||
try:
|
||||
return self.sync_q.get(block=block, timeout=timeout)
|
||||
except queue.Empty as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {str(e)}")
|
||||
raise e
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get"""
|
||||
try:
|
||||
return await self.async_q.get()
|
||||
except queue.Empty as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}")
|
||||
raise e
|
||||
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
return self.sync_q.qsize()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
logger.debug("NotificationQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")
|
||||
|
||||
def _emit_notification_signal(self, item: Dict[str, Any]):
|
||||
"""Emit notification signal"""
|
||||
try:
|
||||
if self.notification_event_signal and isinstance(item, dict):
|
||||
watch_uuid = item.get('uuid')
|
||||
if watch_uuid:
|
||||
self.notification_event_signal.send(watch_uuid=watch_uuid)
|
||||
else:
|
||||
self.notification_event_signal.send()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit notification signal: {str(e)}")
|
||||
124
changedetectionio/realtime/README.md
Normal file
124
changedetectionio/realtime/README.md
Normal file
@@ -0,0 +1,124 @@
|
||||
# Real-time Socket.IO Implementation
|
||||
|
||||
This directory contains the Socket.IO implementation for changedetection.io's real-time updates.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
The real-time system provides live updates to the web interface for:
|
||||
- Watch status changes (checking, completed, errors)
|
||||
- Queue length updates
|
||||
- General statistics updates
|
||||
|
||||
## Current Implementation
|
||||
|
||||
### Socket.IO Configuration
|
||||
- **Async Mode**: `threading` (default) or `gevent` (optional via SOCKETIO_MODE env var)
|
||||
- **Server**: Flask-SocketIO with threading support
|
||||
- **Background Tasks**: Python threading with daemon threads
|
||||
|
||||
### Async Worker Integration
|
||||
- **Workers**: Async workers using asyncio for watch processing
|
||||
- **Queue**: AsyncSignalPriorityQueue for job distribution
|
||||
- **Signals**: Blinker signals for real-time updates between workers and Socket.IO
|
||||
|
||||
### Environment Variables
|
||||
- `SOCKETIO_MODE=threading` (default, recommended)
|
||||
- `SOCKETIO_MODE=gevent` (optional, has cross-platform limitations)
|
||||
|
||||
## Architecture Decision: Why Threading Mode?
|
||||
|
||||
### Previous Issues with Eventlet
|
||||
**Eventlet was completely removed** due to fundamental compatibility issues:
|
||||
|
||||
1. **Monkey Patching Conflicts**: `eventlet.monkey_patch()` globally replaced Python's threading/socket modules, causing conflicts with:
|
||||
- Playwright's synchronous browser automation
|
||||
- Async worker event loops
|
||||
- Various Python libraries expecting real threading
|
||||
|
||||
2. **Python 3.12+ Compatibility**: Eventlet had issues with newer Python versions and asyncio integration
|
||||
|
||||
3. **CVE-2023-29483**: Security vulnerability in eventlet's dnspython dependency
|
||||
|
||||
### Current Solution Benefits
|
||||
✅ **Threading Mode Advantages**:
|
||||
- Full compatibility with async workers and Playwright
|
||||
- No monkey patching - uses standard Python threading
|
||||
- Better Python 3.12+ support
|
||||
- Cross-platform compatibility (Windows, macOS, Linux)
|
||||
- No external async library dependencies
|
||||
- Fast shutdown capabilities
|
||||
|
||||
✅ **Optional Gevent Support**:
|
||||
- Available via `SOCKETIO_MODE=gevent` for high-concurrency scenarios
|
||||
- Cross-platform limitations documented in requirements.txt
|
||||
- Not recommended as default due to Windows socket limits and macOS ARM build issues
|
||||
|
||||
## Socket.IO Mode Configuration
|
||||
|
||||
### Threading Mode (Default)
|
||||
```python
|
||||
# Enabled automatically
|
||||
async_mode = 'threading'
|
||||
socketio = SocketIO(app, async_mode='threading')
|
||||
```
|
||||
|
||||
### Gevent Mode (Optional)
|
||||
```bash
|
||||
# Set environment variable
|
||||
export SOCKETIO_MODE=gevent
|
||||
```
|
||||
|
||||
## Background Tasks
|
||||
|
||||
### Queue Polling
|
||||
- **Threading Mode**: `threading.Thread` with `threading.Event` for shutdown
|
||||
- **Signal Handling**: Blinker signals for watch state changes
|
||||
- **Real-time Updates**: Direct Socket.IO `emit()` calls to connected clients
|
||||
|
||||
### Worker Integration
|
||||
- **Async Workers**: Run in separate asyncio event loop thread
|
||||
- **Communication**: AsyncSignalPriorityQueue bridges async workers and Socket.IO
|
||||
- **Updates**: Real-time updates sent when workers complete tasks
|
||||
|
||||
## Files in This Directory
|
||||
|
||||
- `socket_server.py`: Main Socket.IO initialization and event handling
|
||||
- `events.py`: Watch operation event handlers
|
||||
- `__init__.py`: Module initialization
|
||||
|
||||
## Production Deployment
|
||||
|
||||
### Recommended WSGI Servers
|
||||
For production with Socket.IO threading mode:
|
||||
- **Gunicorn**: `gunicorn --worker-class eventlet changedetection:app` (if using gevent mode)
|
||||
- **uWSGI**: With threading support
|
||||
- **Docker**: Built-in Flask server works well for containerized deployments
|
||||
|
||||
### Performance Considerations
|
||||
- Threading mode: Better memory usage, standard Python threading
|
||||
- Gevent mode: Higher concurrency but platform limitations
|
||||
- Async workers: Separate from Socket.IO, provides scalability
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `SOCKETIO_MODE` | `threading` | Socket.IO async mode (`threading` or `gevent`) |
|
||||
| `FETCH_WORKERS` | `10` | Number of async workers for watch processing |
|
||||
| `CHANGEDETECTION_HOST` | `0.0.0.0` | Server bind address |
|
||||
| `CHANGEDETECTION_PORT` | `5000` | Server port |
|
||||
|
||||
## Debugging Tips
|
||||
|
||||
1. **Socket.IO Issues**: Check browser dev tools for WebSocket connection errors
|
||||
2. **Threading Issues**: Monitor with `ps -T` to check thread count
|
||||
3. **Worker Issues**: Use `/worker-health` endpoint to check async worker status
|
||||
4. **Queue Issues**: Use `/queue-status` endpoint to monitor job queue
|
||||
5. **Performance**: Use `/gc-cleanup` endpoint to trigger memory cleanup
|
||||
|
||||
## Migration Notes
|
||||
|
||||
If upgrading from eventlet-based versions:
|
||||
- Remove any `EVENTLET_*` environment variables
|
||||
- No code changes needed - Socket.IO mode is automatically configured
|
||||
- Optional: Set `SOCKETIO_MODE=gevent` if high concurrency is required and platform supports it
|
||||
3
changedetectionio/realtime/__init__.py
Normal file
3
changedetectionio/realtime/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
Socket.IO realtime updates module for changedetection.io
|
||||
"""
|
||||
58
changedetectionio/realtime/events.py
Normal file
58
changedetectionio/realtime/events.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from flask_socketio import emit
|
||||
from loguru import logger
|
||||
from blinker import signal
|
||||
|
||||
|
||||
def register_watch_operation_handlers(socketio, datastore):
|
||||
"""Register Socket.IO event handlers for watch operations"""
|
||||
|
||||
@socketio.on('watch_operation')
|
||||
def handle_watch_operation(data):
|
||||
"""Handle watch operations like pause, mute, recheck via Socket.IO"""
|
||||
try:
|
||||
op = data.get('op')
|
||||
uuid = data.get('uuid')
|
||||
|
||||
logger.debug(f"Socket.IO: Received watch operation '{op}' for UUID {uuid}")
|
||||
|
||||
if not op or not uuid:
|
||||
emit('operation_result', {'success': False, 'error': 'Missing operation or UUID'})
|
||||
return
|
||||
|
||||
# Check if watch exists
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
emit('operation_result', {'success': False, 'error': 'Watch not found'})
|
||||
return
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Perform the operation
|
||||
if op == 'pause':
|
||||
watch.toggle_pause()
|
||||
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
||||
elif op == 'mute':
|
||||
watch.toggle_mute()
|
||||
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
||||
elif op == 'recheck':
|
||||
# Import here to avoid circular imports
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
logger.info(f"Socket.IO: Queued recheck for watch {uuid}")
|
||||
else:
|
||||
emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'})
|
||||
return
|
||||
|
||||
# Send signal to update UI
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
# Send success response to client
|
||||
emit('operation_result', {'success': True, 'operation': op, 'uuid': uuid})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Socket.IO error in handle_watch_operation: {str(e)}")
|
||||
emit('operation_result', {'success': False, 'error': str(e)})
|
||||
408
changedetectionio/realtime/socket_server.py
Normal file
408
changedetectionio/realtime/socket_server.py
Normal file
@@ -0,0 +1,408 @@
|
||||
import timeago
|
||||
from flask_socketio import SocketIO
|
||||
|
||||
import time
|
||||
import os
|
||||
from loguru import logger
|
||||
from blinker import signal
|
||||
|
||||
from changedetectionio import strtobool
|
||||
|
||||
|
||||
class SignalHandler:
|
||||
"""A standalone class to receive signals"""
|
||||
|
||||
def __init__(self, socketio_instance, datastore):
|
||||
self.socketio_instance = socketio_instance
|
||||
self.datastore = datastore
|
||||
|
||||
# Connect to the watch_check_update signal
|
||||
from changedetectionio.flask_app import watch_check_update as wcc
|
||||
wcc.connect(self.handle_signal, weak=False)
|
||||
# logger.info("SignalHandler: Connected to signal from direct import")
|
||||
|
||||
# Connect to the queue_length signal
|
||||
queue_length_signal = signal('queue_length')
|
||||
queue_length_signal.connect(self.handle_queue_length, weak=False)
|
||||
# logger.info("SignalHandler: Connected to queue_length signal")
|
||||
|
||||
watch_delete_signal = signal('watch_deleted')
|
||||
watch_delete_signal.connect(self.handle_deleted_signal, weak=False)
|
||||
|
||||
watch_favicon_bumped_signal = signal('watch_favicon_bump')
|
||||
watch_favicon_bumped_signal.connect(self.handle_watch_bumped_favicon_signal, weak=False)
|
||||
|
||||
# Connect to the notification_event signal
|
||||
notification_event_signal = signal('notification_event')
|
||||
notification_event_signal.connect(self.handle_notification_event, weak=False)
|
||||
logger.info("SignalHandler: Connected to notification_event signal")
|
||||
|
||||
# Create and start the queue update thread using standard threading
|
||||
import threading
|
||||
self.polling_emitter_thread = threading.Thread(
|
||||
target=self.polling_emit_running_or_queued_watches_threaded,
|
||||
daemon=True
|
||||
)
|
||||
self.polling_emitter_thread.start()
|
||||
logger.info("Started polling thread using threading (eventlet-free)")
|
||||
|
||||
# Store the thread reference in socketio for clean shutdown
|
||||
self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread
|
||||
|
||||
def handle_signal(self, *args, **kwargs):
|
||||
logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs")
|
||||
# Safely extract the watch UUID from kwargs
|
||||
watch_uuid = kwargs.get('watch_uuid')
|
||||
app_context = kwargs.get('app_context')
|
||||
|
||||
if watch_uuid:
|
||||
# Get the watch object from the datastore
|
||||
watch = self.datastore.data['watching'].get(watch_uuid)
|
||||
if watch:
|
||||
if app_context:
|
||||
# note
|
||||
with app_context.app_context():
|
||||
with app_context.test_request_context():
|
||||
# Forward to handle_watch_update with the watch parameter
|
||||
handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore)
|
||||
else:
|
||||
handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore)
|
||||
|
||||
logger.trace(f"Signal handler processed watch UUID {watch_uuid}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID {watch_uuid} not found in datastore")
|
||||
|
||||
def handle_watch_bumped_favicon_signal(self, *args, **kwargs):
|
||||
watch_uuid = kwargs.get('watch_uuid')
|
||||
if watch_uuid:
|
||||
# Emit the queue size to all connected clients
|
||||
self.socketio_instance.emit("watch_bumped_favicon", {
|
||||
"uuid": watch_uuid,
|
||||
"event_timestamp": time.time()
|
||||
})
|
||||
logger.debug(f"Watch UUID {watch_uuid} got its favicon updated")
|
||||
|
||||
def handle_deleted_signal(self, *args, **kwargs):
|
||||
watch_uuid = kwargs.get('watch_uuid')
|
||||
if watch_uuid:
|
||||
# Emit the queue size to all connected clients
|
||||
self.socketio_instance.emit("watch_deleted", {
|
||||
"uuid": watch_uuid,
|
||||
"event_timestamp": time.time()
|
||||
})
|
||||
logger.debug(f"Watch UUID {watch_uuid} was deleted")
|
||||
|
||||
def handle_queue_length(self, *args, **kwargs):
|
||||
"""Handle queue_length signal and emit to all clients"""
|
||||
try:
|
||||
queue_length = kwargs.get('length', 0)
|
||||
logger.debug(f"SignalHandler: Queue length update received: {queue_length}")
|
||||
|
||||
# Emit the queue size to all connected clients
|
||||
self.socketio_instance.emit("queue_size", {
|
||||
"q_length": queue_length,
|
||||
"event_timestamp": time.time()
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Socket.IO error in handle_queue_length: {str(e)}")
|
||||
|
||||
def handle_notification_event(self, *args, **kwargs):
|
||||
"""Handle notification_event signal and emit to all clients"""
|
||||
try:
|
||||
watch_uuid = kwargs.get('watch_uuid')
|
||||
logger.debug(f"SignalHandler: Notification event received for watch UUID: {watch_uuid}")
|
||||
|
||||
# Emit the notification event to all connected clients
|
||||
self.socketio_instance.emit("notification_event", {
|
||||
"watch_uuid": watch_uuid,
|
||||
"event_timestamp": time.time()
|
||||
})
|
||||
|
||||
logger.trace(f"Socket.IO: Emitted notification_event for watch UUID {watch_uuid}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Socket.IO error in handle_notification_event: {str(e)}")
|
||||
|
||||
def polling_emit_running_or_queued_watches_threaded(self):
|
||||
"""Threading version of polling for Windows compatibility"""
|
||||
import time
|
||||
import threading
|
||||
logger.info("Queue update thread started (threading mode)")
|
||||
|
||||
# Import here to avoid circular imports
|
||||
from changedetectionio.flask_app import app
|
||||
from changedetectionio import worker_handler
|
||||
watch_check_update = signal('watch_check_update')
|
||||
|
||||
# Track previous state to avoid unnecessary emissions
|
||||
previous_running_uuids = set()
|
||||
|
||||
# Run until app shutdown - check exit flag more frequently for fast shutdown
|
||||
exit_event = getattr(app.config, 'exit', threading.Event())
|
||||
|
||||
while not exit_event.is_set():
|
||||
try:
|
||||
# Get current running UUIDs from async workers
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
|
||||
# Only send updates for UUIDs that changed state
|
||||
newly_running = running_uuids - previous_running_uuids
|
||||
no_longer_running = previous_running_uuids - running_uuids
|
||||
|
||||
# Send updates for newly running UUIDs (but exit fast if shutdown requested)
|
||||
for uuid in newly_running:
|
||||
if exit_event.is_set():
|
||||
break
|
||||
logger.trace(f"Threading polling: UUID {uuid} started processing")
|
||||
with app.app_context():
|
||||
watch_check_update.send(app_context=app, watch_uuid=uuid)
|
||||
time.sleep(0.01) # Small yield
|
||||
|
||||
# Send updates for UUIDs that finished processing (but exit fast if shutdown requested)
|
||||
if not exit_event.is_set():
|
||||
for uuid in no_longer_running:
|
||||
if exit_event.is_set():
|
||||
break
|
||||
logger.trace(f"Threading polling: UUID {uuid} finished processing")
|
||||
with app.app_context():
|
||||
watch_check_update.send(app_context=app, watch_uuid=uuid)
|
||||
time.sleep(0.01) # Small yield
|
||||
|
||||
# Update tracking for next iteration
|
||||
previous_running_uuids = running_uuids
|
||||
|
||||
# Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown
|
||||
for _ in range(20): # 20 * 0.5 = 10 seconds total
|
||||
if exit_event.is_set():
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in threading polling: {str(e)}")
|
||||
# Even during error recovery, check for exit quickly
|
||||
for _ in range(1): # 1 * 0.5 = 0.5 seconds
|
||||
if exit_event.is_set():
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
import sys
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
if not in_pytest:
|
||||
logger.info("Queue update thread stopped (threading mode)")
|
||||
|
||||
|
||||
def handle_watch_update(socketio, **kwargs):
|
||||
"""Handle watch update signal from blinker"""
|
||||
try:
|
||||
watch = kwargs.get('watch')
|
||||
datastore = kwargs.get('datastore')
|
||||
|
||||
# Emit the watch update to all connected clients
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio.flask_app import _jinja2_filter_datetime
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
# Get list of watches that are currently running
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
# Get list of watches in the queue
|
||||
queue_list = []
|
||||
for q_item in update_q.queue:
|
||||
if hasattr(q_item, 'item') and 'uuid' in q_item.item:
|
||||
queue_list.append(q_item.item['uuid'])
|
||||
|
||||
# Get the error texts from the watch
|
||||
error_texts = watch.compile_error_texts()
|
||||
# Create a simplified watch data object to send to clients
|
||||
|
||||
watch_data = {
|
||||
'checking_now': True if watch.get('uuid') in running_uuids else False,
|
||||
'error_text': error_texts,
|
||||
'event_timestamp': time.time(),
|
||||
'fetch_time': watch.get('fetch_time'),
|
||||
'has_error': True if error_texts else False,
|
||||
'has_favicon': True if watch.get_favicon_filename() else False,
|
||||
'history_n': watch.history_n,
|
||||
'last_changed_text': timeago.format(int(watch.last_changed), time.time()) if watch.history_n >= 2 and int(watch.last_changed) > 0 else 'Not yet',
|
||||
'last_checked': watch.get('last_checked'),
|
||||
'last_checked_text': _jinja2_filter_datetime(watch),
|
||||
'notification_muted': True if watch.get('notification_muted') else False,
|
||||
'paused': True if watch.get('paused') else False,
|
||||
'queued': True if watch.get('uuid') in queue_list else False,
|
||||
'unviewed': watch.has_unviewed,
|
||||
'uuid': watch.get('uuid'),
|
||||
}
|
||||
|
||||
errored_count = 0
|
||||
for watch_uuid_iter, watch_iter in datastore.data['watching'].items():
|
||||
if watch_iter.get('last_error'):
|
||||
errored_count += 1
|
||||
|
||||
general_stats = {
|
||||
'count_errors': errored_count,
|
||||
'unread_changes_count': datastore.unread_changes_count
|
||||
}
|
||||
|
||||
# Debug what's being emitted
|
||||
# logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}")
|
||||
|
||||
# Emit to all clients (no 'broadcast' parameter needed - it's the default behavior)
|
||||
socketio.emit("watch_update", {'watch': watch_data})
|
||||
socketio.emit("general_stats_update", general_stats)
|
||||
|
||||
# Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues
|
||||
logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
||||
|
||||
|
||||
def init_socketio(app, datastore):
|
||||
"""Initialize SocketIO with the main Flask app"""
|
||||
import platform
|
||||
import sys
|
||||
|
||||
# Platform-specific async_mode selection for better stability
|
||||
system = platform.system().lower()
|
||||
python_version = sys.version_info
|
||||
|
||||
# Check for SocketIO mode configuration via environment variable
|
||||
# Default is 'threading' for best cross-platform compatibility
|
||||
socketio_mode = os.getenv('SOCKETIO_MODE', 'threading').lower()
|
||||
|
||||
if socketio_mode == 'gevent':
|
||||
# Use gevent mode (higher concurrency but platform limitations)
|
||||
try:
|
||||
import gevent
|
||||
async_mode = 'gevent'
|
||||
logger.info(f"SOCKETIO_MODE=gevent: Using {async_mode} mode for Socket.IO")
|
||||
except ImportError:
|
||||
async_mode = 'threading'
|
||||
logger.warning(f"SOCKETIO_MODE=gevent but gevent not available, falling back to {async_mode} mode")
|
||||
elif socketio_mode == 'threading':
|
||||
# Use threading mode (default - best compatibility)
|
||||
async_mode = 'threading'
|
||||
logger.info(f"SOCKETIO_MODE=threading: Using {async_mode} mode for Socket.IO")
|
||||
else:
|
||||
# Invalid mode specified, use default
|
||||
async_mode = 'threading'
|
||||
logger.warning(f"Invalid SOCKETIO_MODE='{socketio_mode}', using default {async_mode} mode for Socket.IO")
|
||||
|
||||
# Log platform info for debugging
|
||||
logger.info(f"Platform: {system}, Python: {python_version.major}.{python_version.minor}, Socket.IO mode: {async_mode}")
|
||||
|
||||
# Restrict SocketIO CORS to same origin by default, can be overridden with env var
|
||||
cors_origins = os.environ.get('SOCKETIO_CORS_ORIGINS', None)
|
||||
|
||||
socketio = SocketIO(app,
|
||||
async_mode=async_mode,
|
||||
cors_allowed_origins=cors_origins, # None means same-origin only
|
||||
logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')),
|
||||
engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')))
|
||||
|
||||
# Set up event handlers
|
||||
logger.info("Socket.IO: Registering connect event handler")
|
||||
|
||||
@socketio.on('checkbox-operation')
|
||||
def event_checkbox_operations(data):
|
||||
from changedetectionio.blueprint.ui import _handle_operations
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.flask_app import update_q, watch_check_update
|
||||
logger.trace(f"Got checkbox operations event: {data}")
|
||||
|
||||
datastore = socketio.datastore
|
||||
|
||||
_handle_operations(
|
||||
op=data.get('op'),
|
||||
uuids=data.get('uuids'),
|
||||
datastore=datastore,
|
||||
extra_data=data.get('extra_data'),
|
||||
worker_handler=worker_handler,
|
||||
update_q=update_q,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
watch_check_update=watch_check_update,
|
||||
emit_flash=False
|
||||
)
|
||||
|
||||
@socketio.on('connect')
|
||||
def handle_connect():
|
||||
"""Handle client connection"""
|
||||
# logger.info("Socket.IO: CONNECT HANDLER CALLED - Starting connection process")
|
||||
from flask import request
|
||||
from flask_login import current_user
|
||||
from changedetectionio.flask_app import update_q
|
||||
|
||||
# Access datastore from socketio
|
||||
datastore = socketio.datastore
|
||||
# logger.info(f"Socket.IO: Current user authenticated: {current_user.is_authenticated if hasattr(current_user, 'is_authenticated') else 'No current_user'}")
|
||||
|
||||
# Check if authentication is required and user is not authenticated
|
||||
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
|
||||
# logger.info(f"Socket.IO: Password enabled: {has_password_enabled}")
|
||||
if has_password_enabled and not current_user.is_authenticated:
|
||||
logger.warning("Socket.IO: Rejecting unauthenticated connection")
|
||||
return False # Reject the connection
|
||||
|
||||
# Send the current queue size to the newly connected client
|
||||
try:
|
||||
queue_size = update_q.qsize()
|
||||
socketio.emit("queue_size", {
|
||||
"q_length": queue_size,
|
||||
"event_timestamp": time.time()
|
||||
}, room=request.sid) # Send only to this client
|
||||
logger.debug(f"Socket.IO: Sent initial queue size {queue_size} to new client")
|
||||
except Exception as e:
|
||||
logger.error(f"Socket.IO error sending initial queue size: {str(e)}")
|
||||
|
||||
logger.info("Socket.IO: Client connected")
|
||||
|
||||
# logger.info("Socket.IO: Registering disconnect event handler")
|
||||
@socketio.on('disconnect')
|
||||
def handle_disconnect():
|
||||
"""Handle client disconnection"""
|
||||
logger.info("Socket.IO: Client disconnected")
|
||||
|
||||
# Create a dedicated signal handler that will receive signals and emit them to clients
|
||||
signal_handler = SignalHandler(socketio, datastore)
|
||||
|
||||
# Register watch operation event handlers
|
||||
from .events import register_watch_operation_handlers
|
||||
register_watch_operation_handlers(socketio, datastore)
|
||||
|
||||
# Store the datastore reference on the socketio object for later use
|
||||
socketio.datastore = datastore
|
||||
|
||||
# No stop event needed for threading mode - threads check app.config.exit directly
|
||||
|
||||
# Add a shutdown method to the socketio object
|
||||
def shutdown():
|
||||
"""Shutdown the SocketIO server fast and aggressively"""
|
||||
try:
|
||||
logger.info("Socket.IO: Fast shutdown initiated...")
|
||||
|
||||
# For threading mode, give the thread a very short time to exit gracefully
|
||||
if hasattr(socketio, 'polling_emitter_thread'):
|
||||
if socketio.polling_emitter_thread.is_alive():
|
||||
logger.info("Socket.IO: Waiting 1 second for polling thread to stop...")
|
||||
socketio.polling_emitter_thread.join(timeout=1.0) # Only 1 second timeout
|
||||
if socketio.polling_emitter_thread.is_alive():
|
||||
logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown")
|
||||
else:
|
||||
logger.info("Socket.IO: Polling thread stopped quickly")
|
||||
else:
|
||||
logger.info("Socket.IO: Polling thread already stopped")
|
||||
|
||||
logger.info("Socket.IO: Fast shutdown complete")
|
||||
except Exception as e:
|
||||
logger.error(f"Socket.IO error during shutdown: {str(e)}")
|
||||
|
||||
# Attach the shutdown method to the socketio object
|
||||
socketio.shutdown = shutdown
|
||||
|
||||
logger.info("Socket.IO initialized and attached to main Flask app")
|
||||
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
|
||||
return socketio
|
||||
130
changedetectionio/rss_tools.py
Normal file
130
changedetectionio/rss_tools.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""
|
||||
RSS/Atom feed processing tools for changedetection.io
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
import re
|
||||
|
||||
|
||||
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||
"""
|
||||
Process CDATA sections in HTML/XML content - inline replacement.
|
||||
|
||||
Args:
|
||||
html_content: The HTML/XML content to process
|
||||
render_anchor_tag_content: Whether to render anchor tag content
|
||||
|
||||
Returns:
|
||||
Processed HTML/XML content with CDATA sections replaced inline
|
||||
"""
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
from .html_tools import html_to_text
|
||||
|
||||
pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
|
||||
|
||||
def repl(m):
|
||||
text = m.group(1)
|
||||
return xml_escape(html_to_text(html_content=text, render_anchor_tag_content=render_anchor_tag_content)).strip()
|
||||
|
||||
return re.sub(pattern, repl, html_content)
|
||||
|
||||
|
||||
def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
|
||||
"""
|
||||
Format RSS/Atom feed items in a readable text format using feedparser.
|
||||
|
||||
Converts RSS <item> or Atom <entry> elements to formatted text with:
|
||||
- <title> → <h1>Title</h1>
|
||||
- <link> → Link: [url]
|
||||
- <guid> → Guid: [id]
|
||||
- <pubDate> → PubDate: [date]
|
||||
- <description> or <content> → Raw HTML content (CDATA and entities automatically handled)
|
||||
|
||||
Args:
|
||||
rss_content: The RSS/Atom feed content
|
||||
render_anchor_tag_content: Whether to render anchor tag content in descriptions (unused, kept for compatibility)
|
||||
|
||||
Returns:
|
||||
Formatted HTML content ready for html_to_text conversion
|
||||
"""
|
||||
try:
|
||||
import feedparser
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
|
||||
# Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc.
|
||||
feed = feedparser.parse(rss_content)
|
||||
|
||||
formatted_items = []
|
||||
|
||||
# Determine feed type for appropriate labels when fields are missing
|
||||
# feedparser sets feed.version to things like 'rss20', 'atom10', etc.
|
||||
is_atom = feed.version and 'atom' in feed.version
|
||||
|
||||
for entry in feed.entries:
|
||||
item_parts = []
|
||||
|
||||
# Title - feedparser handles CDATA and entity unescaping automatically
|
||||
if hasattr(entry, 'title') and entry.title:
|
||||
item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>')
|
||||
|
||||
# Link
|
||||
if hasattr(entry, 'link') and entry.link:
|
||||
item_parts.append(f'Link: {xml_escape(entry.link)}<br>')
|
||||
|
||||
# GUID/ID
|
||||
if hasattr(entry, 'id') and entry.id:
|
||||
item_parts.append(f'Guid: {xml_escape(entry.id)}<br>')
|
||||
|
||||
# Date - feedparser normalizes all date field names to 'published'
|
||||
if hasattr(entry, 'published') and entry.published:
|
||||
item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>')
|
||||
|
||||
# Description/Content - feedparser handles CDATA and entity unescaping automatically
|
||||
# Only add "Summary:" label for Atom <summary> tags
|
||||
content = None
|
||||
add_label = False
|
||||
|
||||
if hasattr(entry, 'content') and entry.content:
|
||||
# Atom <content> - no label, just content
|
||||
content = entry.content[0].value if entry.content[0].value else None
|
||||
elif hasattr(entry, 'summary'):
|
||||
# Could be RSS <description> or Atom <summary>
|
||||
# feedparser maps both to entry.summary
|
||||
content = entry.summary if entry.summary else None
|
||||
# Only add "Summary:" label for Atom feeds (which use <summary> tag)
|
||||
if is_atom:
|
||||
add_label = True
|
||||
|
||||
# Add content with or without label
|
||||
if content:
|
||||
if add_label:
|
||||
item_parts.append(f'Summary:<br>{content}')
|
||||
else:
|
||||
item_parts.append(content)
|
||||
else:
|
||||
# No content - just show <none>
|
||||
item_parts.append('<none>')
|
||||
|
||||
# Join all parts of this item
|
||||
if item_parts:
|
||||
formatted_items.append('\n'.join(item_parts))
|
||||
|
||||
# Wrap each item in a div with classes (first, last, item-N)
|
||||
items_html = []
|
||||
total_items = len(formatted_items)
|
||||
for idx, item in enumerate(formatted_items):
|
||||
classes = ['rss-item']
|
||||
if idx == 0:
|
||||
classes.append('first')
|
||||
if idx == total_items - 1:
|
||||
classes.append('last')
|
||||
classes.append(f'item-{idx + 1}')
|
||||
|
||||
class_str = ' '.join(classes)
|
||||
items_html.append(f'<div class="{class_str}">{item}</div>')
|
||||
return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>'
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error formatting RSS items: {str(e)}")
|
||||
# Fall back to original content
|
||||
return rss_content
|
||||
@@ -11,32 +11,32 @@ set -e
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
|
||||
find tests/test_*py -type f|while read test_name
|
||||
do
|
||||
echo "TEST RUNNING $test_name"
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
|
||||
done
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -n 30 --dist load tests/test_*.py
|
||||
|
||||
#time pytest -n auto --dist loadfile -vv --tb=long tests/test_*.py
|
||||
echo "RUNNING WITH BASE_URL SET"
|
||||
|
||||
# Now re-run some tests with BASE_URL enabled
|
||||
# Re #65 - Ability to include a link back to the installation, in the notification.
|
||||
export BASE_URL="https://really-unique-domain.io"
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv --maxfail=1 tests/test_notification.py
|
||||
|
||||
|
||||
# Re-run with HIDE_REFERER set - could affect login
|
||||
export HIDE_REFERER=True
|
||||
pytest tests/test_access_control.py
|
||||
pytest -vv -s --maxfail=1 tests/test_access_control.py
|
||||
|
||||
# Re-run a few tests that will trigger brotli based storage
|
||||
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
||||
pytest tests/test_access_control.py
|
||||
pytest -vv -s --maxfail=1 tests/test_access_control.py
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
||||
pytest tests/test_backend.py
|
||||
pytest tests/test_rss.py
|
||||
pytest tests/test_unique_lines.py
|
||||
pytest -vv -s --maxfail=1 tests/test_backend.py
|
||||
pytest -vv -s --maxfail=1 tests/test_rss.py
|
||||
pytest -vv -s --maxfail=1 tests/test_unique_lines.py
|
||||
|
||||
# Try high concurrency
|
||||
FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l
|
||||
|
||||
# Check file:// will pickup a file when enabled
|
||||
echo "Hello world" > /tmp/test-file.txt
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
# enable debug
|
||||
set -x
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
|
||||
# A extra browser is configured, but we never chose to use it, so it should NOT show in the logs
|
||||
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url'
|
||||
|
||||
@@ -19,12 +19,13 @@ docker run --network changedet-network -d \
|
||||
-v `pwd`/tests/proxy_list/squid-passwords.txt:/etc/squid3/passwords \
|
||||
ubuntu/squid:4.13-21.10_edge
|
||||
|
||||
|
||||
sleep 5
|
||||
## 2nd test actually choose the preferred proxy from proxies.json
|
||||
# This will force a request via "proxy-two"
|
||||
docker run --network changedet-network \
|
||||
-v `pwd`/tests/proxy_list/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
|
||||
-v `pwd`/tests/proxy_list/proxies.json-example:/tmp/proxies.json \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_multiple_proxy.py'
|
||||
bash -c 'cd changedetectionio && pytest -s tests/proxy_list/test_multiple_proxy.py --datastore-path /tmp'
|
||||
|
||||
set +e
|
||||
echo "- Looking for chosen.changedetection.io request in squid-one - it should NOT be here"
|
||||
@@ -48,8 +49,10 @@ fi
|
||||
# Test the UI configurable proxies
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py --datastore-path /tmp'
|
||||
|
||||
# Give squid proxies a moment to flush their logs
|
||||
sleep 2
|
||||
|
||||
# Should see a request for one.changedetection.io in there
|
||||
echo "- Looking for .changedetection.io request in squid-custom"
|
||||
@@ -63,7 +66,10 @@ fi
|
||||
# Test "no-proxy" option
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py --datastore-path /tmp'
|
||||
|
||||
# Give squid proxies a moment to flush their logs
|
||||
sleep 2
|
||||
|
||||
# We need to handle grep returning 1
|
||||
set +e
|
||||
@@ -80,5 +86,29 @@ for c in $(echo "squid-one squid-two squid-custom"); do
|
||||
fi
|
||||
done
|
||||
|
||||
echo "docker ps output"
|
||||
docker ps
|
||||
|
||||
docker kill squid-one squid-two squid-custom
|
||||
|
||||
# Test that the UI is returning the correct error message when a proxy is not available
|
||||
|
||||
# Requests
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
|
||||
|
||||
# Playwright
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
|
||||
|
||||
# Puppeteer fast
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
|
||||
|
||||
# Selenium
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
|
||||
|
||||
@@ -5,22 +5,23 @@ set -e
|
||||
# enable debug
|
||||
set -x
|
||||
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
|
||||
# SOCKS5 related - start simple Socks5 proxy server
|
||||
# SOCKSTEST=xyz should show in the logs of this service to confirm it fetched
|
||||
docker run --network changedet-network -d --hostname socks5proxy --rm --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy
|
||||
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth serjs/go-socks5-proxy
|
||||
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth -e REQUIRE_AUTH=false serjs/go-socks5-proxy
|
||||
|
||||
echo "---------------------------------- SOCKS5 -------------------"
|
||||
# SOCKS5 related - test from proxies.json
|
||||
docker run --network changedet-network \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example:/tmp/proxies.json \
|
||||
--rm \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=proxiesjson" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py --datastore-path /tmp'
|
||||
|
||||
# SOCKS5 related - by manually entering in UI
|
||||
docker run --network changedet-network \
|
||||
@@ -29,18 +30,18 @@ docker run --network changedet-network \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=manual" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py'
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py --datastore-path /tmp'
|
||||
|
||||
# SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
|
||||
docker run --network changedet-network \
|
||||
-e "SOCKSTEST=manual-playwright" \
|
||||
--hostname cdio \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/tmp/proxies.json \
|
||||
-e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
|
||||
--rm \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py --datastore-path /tmp'
|
||||
|
||||
echo "socks5 server logs"
|
||||
docker logs socks5proxy
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
"""
|
||||
Safe Jinja2 render with max payload sizes
|
||||
|
||||
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
|
||||
"""
|
||||
|
||||
import jinja2.sandbox
|
||||
import typing as t
|
||||
import os
|
||||
|
||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
|
||||
|
||||
|
||||
def render(template_str, **args: t.Any) -> str:
|
||||
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension'])
|
||||
output = jinja2_env.from_string(template_str).render(args)
|
||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
$(document).ready(function () {
|
||||
var a = document.getElementById("a");
|
||||
var b = document.getElementById("b");
|
||||
var result = document.getElementById("result");
|
||||
|
||||
var inputs;
|
||||
|
||||
$('#jump-next-diff').click(function () {
|
||||
@@ -23,93 +21,8 @@ $(document).ready(function () {
|
||||
});
|
||||
|
||||
function changed() {
|
||||
// https://github.com/kpdecker/jsdiff/issues/389
|
||||
// I would love to use `{ignoreWhitespace: true}` here but it breaks the formatting
|
||||
options = {
|
||||
ignoreWhitespace: document.getElementById("ignoreWhitespace").checked,
|
||||
};
|
||||
|
||||
var diff = Diff[window.diffType](a.textContent, b.textContent, options);
|
||||
var fragment = document.createDocumentFragment();
|
||||
for (var i = 0; i < diff.length; i++) {
|
||||
if (diff[i].added && diff[i + 1] && diff[i + 1].removed) {
|
||||
var swap = diff[i];
|
||||
diff[i] = diff[i + 1];
|
||||
diff[i + 1] = swap;
|
||||
}
|
||||
|
||||
var node;
|
||||
if (diff[i].removed) {
|
||||
node = document.createElement("del");
|
||||
node.classList.add("change");
|
||||
const wrapper = node.appendChild(document.createElement("span"));
|
||||
wrapper.appendChild(document.createTextNode(diff[i].value));
|
||||
} else if (diff[i].added) {
|
||||
node = document.createElement("ins");
|
||||
node.classList.add("change");
|
||||
const wrapper = node.appendChild(document.createElement("span"));
|
||||
wrapper.appendChild(document.createTextNode(diff[i].value));
|
||||
} else {
|
||||
node = document.createTextNode(diff[i].value);
|
||||
}
|
||||
fragment.appendChild(node);
|
||||
}
|
||||
|
||||
result.textContent = "";
|
||||
result.appendChild(fragment);
|
||||
|
||||
// For nice mouse-over hover/title information
|
||||
const removed_current_option = $('#diff-version option:selected')
|
||||
if (removed_current_option) {
|
||||
$('del').each(function () {
|
||||
$(this).prop('title', 'Removed '+removed_current_option[0].label);
|
||||
});
|
||||
}
|
||||
const inserted_current_option = $('#current-version option:selected')
|
||||
if (removed_current_option) {
|
||||
$('ins').each(function () {
|
||||
$(this).prop('title', 'Inserted '+inserted_current_option[0].label);
|
||||
});
|
||||
}
|
||||
// Set the list of possible differences to jump to
|
||||
inputs = document.querySelectorAll('#diff-ui .change')
|
||||
// Set the "current" diff pointer
|
||||
inputs.current = 0;
|
||||
// Goto diff
|
||||
$('#jump-next-diff').click();
|
||||
//$('#jump-next-diff').click();
|
||||
}
|
||||
|
||||
|
||||
onDiffTypeChange(
|
||||
document.querySelector('#settings [name="diff_type"]:checked'),
|
||||
);
|
||||
changed();
|
||||
|
||||
a.onpaste = a.onchange = b.onpaste = b.onchange = changed;
|
||||
|
||||
if ("oninput" in a) {
|
||||
a.oninput = b.oninput = changed;
|
||||
} else {
|
||||
a.onkeyup = b.onkeyup = changed;
|
||||
}
|
||||
|
||||
function onDiffTypeChange(radio) {
|
||||
window.diffType = radio.value;
|
||||
// Not necessary
|
||||
// document.title = "Diff " + radio.value.slice(4);
|
||||
}
|
||||
|
||||
var radio = document.getElementsByName("diff_type");
|
||||
for (var i = 0; i < radio.length; i++) {
|
||||
radio[i].onchange = function (e) {
|
||||
onDiffTypeChange(e.target);
|
||||
changed();
|
||||
};
|
||||
}
|
||||
|
||||
document.getElementById("ignoreWhitespace").onchange = function (e) {
|
||||
changed();
|
||||
};
|
||||
|
||||
});
|
||||
|
||||
|
||||
38
changedetectionio/static/js/diff.min.js
vendored
38
changedetectionio/static/js/diff.min.js
vendored
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user