mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-03-25 13:17:57 +00:00
Compare commits
31 Commits
browser-se
...
i18n-tweak
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2a3d3b89d5 | ||
|
|
f3174743e9 | ||
|
|
6e205b736b | ||
|
|
44c615963e | ||
|
|
63271d58e1 | ||
|
|
59d42a8398 | ||
|
|
895368144f | ||
|
|
9096407fcb | ||
|
|
df8f86ccbf | ||
|
|
40dc3fef7e | ||
|
|
5f4998960d | ||
|
|
7a515c4202 | ||
|
|
48e21226a1 | ||
|
|
cdf34bf614 | ||
|
|
a94560190f | ||
|
|
fefaf40514 | ||
|
|
6f66c39628 | ||
|
|
eb0f83b45b | ||
|
|
f2284f7a9b | ||
|
|
4b0ad525f3 | ||
|
|
a748a43224 | ||
|
|
acfcaf42d4 | ||
|
|
6158bb48b8 | ||
|
|
d4fc1a3b6e | ||
|
|
f39b5e5a46 | ||
|
|
30ba603956 | ||
|
|
3147c5a3e2 | ||
|
|
f599efacab | ||
|
|
d7dbc50d70 | ||
|
|
51bb358ea7 | ||
|
|
fe4df1d41f |
33
.github/nginx-reverse-proxy-test.conf
vendored
33
.github/nginx-reverse-proxy-test.conf
vendored
@@ -1,33 +0,0 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name localhost;
|
||||
|
||||
# Test basic reverse proxy to changedetection.io
|
||||
location / {
|
||||
proxy_pass http://changedet-app:5000;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
|
||||
# Test subpath deployment with X-Forwarded-Prefix
|
||||
location /changedet-sub/ {
|
||||
proxy_pass http://changedet-app:5000/;
|
||||
proxy_set_header X-Forwarded-Prefix /changedet-sub;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
}
|
||||
54
.github/workflows/containers.yml
vendored
54
.github/workflows/containers.yml
vendored
@@ -15,6 +15,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- dev
|
||||
|
||||
jobs:
|
||||
metadata:
|
||||
@@ -66,62 +67,43 @@ jobs:
|
||||
echo ${{ github.ref }} > changedetectionio/tag.txt
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Docker Hub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
driver-opts: image=moby/buildkit:master
|
||||
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v6
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
||||
ghcr.io/${{ github.repository }}
|
||||
tags: |
|
||||
type=raw,value=dev
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
|
||||
# dev branch -> :dev container tag
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/build-push-action@v7
|
||||
if: ${{ github.ref == 'refs/heads/dev' }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta_dev.outputs.tags }}
|
||||
labels: ${{ steps.meta_dev.outputs.labels }}
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
@@ -136,10 +118,10 @@ jobs:
|
||||
echo "Release tag: ${{ github.event.release.tag_name }}"
|
||||
echo "Github ref: ${{ github.ref }}"
|
||||
echo "Github ref name: ${{ github.ref_name }}"
|
||||
|
||||
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v6
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta
|
||||
with:
|
||||
images: |
|
||||
@@ -150,26 +132,16 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
||||
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
||||
type=raw,value=latest
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
org.opencontainers.image.version=${{ github.event.release.tag_name }}
|
||||
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
10
.github/workflows/pypi-release.yml
vendored
10
.github/workflows/pypi-release.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -61,8 +61,8 @@ jobs:
|
||||
|
||||
# --- API test ---
|
||||
# This also means that the docs/api-spec.yml was shipped and could be read
|
||||
test -f /tmp/changedetection.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/changedetection.json)
|
||||
test -f /tmp/url-watches.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json)
|
||||
echo Test API KEY is $API_KEY
|
||||
curl -X POST "http://127.0.0.1:10000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
6
.github/workflows/test-container-build.yml
vendored
6
.github/workflows/test-container-build.yml
vendored
@@ -60,14 +60,14 @@ jobs:
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
|
||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
|
||||
11
.github/workflows/test-only.yml
vendored
11
.github/workflows/test-only.yml
vendored
@@ -52,13 +52,4 @@ jobs:
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.13'
|
||||
skip-pypuppeteer: true
|
||||
|
||||
|
||||
test-application-3-14:
|
||||
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.14'
|
||||
skip-pypuppeteer: false
|
||||
skip-pypuppeteer: true
|
||||
424
.github/workflows/test-stack-reusable-workflow.yml
vendored
424
.github/workflows/test-stack-reusable-workflow.yml
vendored
@@ -37,29 +37,10 @@ jobs:
|
||||
${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Get current date for cache key
|
||||
id: date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v7
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
build-args: |
|
||||
PYTHON_VERSION=${{ env.PYTHON_VERSION }}
|
||||
LOGGER_LEVEL=TRACE
|
||||
tags: test-changedetectionio
|
||||
load: true
|
||||
cache-from: type=gha,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }}
|
||||
cache-to: type=gha,mode=max,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }}
|
||||
|
||||
- name: Verify build
|
||||
run: |
|
||||
echo "---- Built for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
|
||||
- name: We should be Python ${{ env.PYTHON_VERSION }} ...
|
||||
@@ -71,7 +52,7 @@ jobs:
|
||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/test-changedetectionio.tar
|
||||
@@ -88,7 +69,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -103,7 +84,6 @@ jobs:
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
@@ -116,7 +96,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -130,32 +110,6 @@ jobs:
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
|
||||
- name: Test CLI options
|
||||
run: |
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-cli-opts --network changedet-network test-changedetectionio bash -c 'changedetectionio/test_cli_opts.sh' &> cli-opts-output.txt
|
||||
echo "=== CLI Options Test Output ==="
|
||||
cat cli-opts-output.txt
|
||||
|
||||
- name: CLI Memory Test
|
||||
run: |
|
||||
echo "=== Checking CLI batch mode memory usage ==="
|
||||
# Extract RSS memory value from output
|
||||
RSS_MB=$(grep -oP "Memory consumption before worker shutdown: RSS=\K[\d.]+" cli-opts-output.txt | head -1 || echo "0")
|
||||
echo "RSS Memory: ${RSS_MB} MB"
|
||||
|
||||
# Check if RSS is less than 100MB
|
||||
if [ -n "$RSS_MB" ]; then
|
||||
if (( $(echo "$RSS_MB < 100" | bc -l) )); then
|
||||
echo "✓ Memory usage is acceptable: ${RSS_MB} MB < 100 MB"
|
||||
else
|
||||
echo "✗ Memory usage too high: ${RSS_MB} MB >= 100 MB"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "⚠ Could not extract memory usage, skipping check"
|
||||
fi
|
||||
|
||||
- name: Extract memory report and logs
|
||||
if: always()
|
||||
uses: ./.github/actions/extract-memory-report
|
||||
@@ -165,18 +119,11 @@ jobs:
|
||||
|
||||
- name: Store test artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Store CLI test output
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||
path: cli-opts-output.txt
|
||||
|
||||
# Playwright tests
|
||||
playwright-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -188,7 +135,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -230,7 +177,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -270,7 +217,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -306,7 +253,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -324,175 +271,6 @@ jobs:
|
||||
run: |
|
||||
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
|
||||
|
||||
nginx-reverse-proxy:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
|
||||
# Start changedetection.io container with X-Forwarded headers support
|
||||
docker run --name changedet-app --hostname changedet-app --network changedet-network \
|
||||
-e USE_X_SETTINGS=true \
|
||||
-d test-changedetectionio
|
||||
sleep 3
|
||||
|
||||
- name: Start nginx reverse proxy
|
||||
run: |
|
||||
# Start nginx with our test configuration
|
||||
docker run --name nginx-proxy --network changedet-network -d -p 8080:80 --rm \
|
||||
-v ${{ github.workspace }}/.github/nginx-reverse-proxy-test.conf:/etc/nginx/conf.d/default.conf:ro \
|
||||
nginx:alpine
|
||||
sleep 2
|
||||
|
||||
- name: Test reverse proxy - root path
|
||||
run: |
|
||||
echo "=== Testing nginx reverse proxy at root path ==="
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:8080/ > /tmp/nginx-test-root.html
|
||||
|
||||
# Check for changedetection.io UI elements
|
||||
if grep -q "checkbox-uuid" /tmp/nginx-test-root.html; then
|
||||
echo "✓ Found checkbox-uuid in response"
|
||||
else
|
||||
echo "ERROR: checkbox-uuid not found in response"
|
||||
cat /tmp/nginx-test-root.html
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for watchlist content
|
||||
if grep -q -i "watch" /tmp/nginx-test-root.html; then
|
||||
echo "✓ Found watch/watchlist content in response"
|
||||
else
|
||||
echo "ERROR: watchlist content not found"
|
||||
cat /tmp/nginx-test-root.html
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ Root path reverse proxy working correctly"
|
||||
|
||||
- name: Test reverse proxy - subpath with X-Forwarded-Prefix
|
||||
run: |
|
||||
echo "=== Testing nginx reverse proxy at subpath /changedet-sub/ ==="
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:8080/changedet-sub/ > /tmp/nginx-test-subpath.html
|
||||
|
||||
# Check for changedetection.io UI elements
|
||||
if grep -q "checkbox-uuid" /tmp/nginx-test-subpath.html; then
|
||||
echo "✓ Found checkbox-uuid in subpath response"
|
||||
else
|
||||
echo "ERROR: checkbox-uuid not found in subpath response"
|
||||
cat /tmp/nginx-test-subpath.html
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ Subpath reverse proxy working correctly"
|
||||
|
||||
- name: Test API through reverse proxy subpath
|
||||
run: |
|
||||
echo "=== Testing API endpoints through nginx subpath /changedet-sub/ ==="
|
||||
|
||||
# Extract API key from the changedetection.io datastore
|
||||
API_KEY=$(docker exec changedet-app cat /datastore/changedetection.json | grep -o '"api_access_token": *"[^"]*"' | cut -d'"' -f4)
|
||||
|
||||
if [ -z "$API_KEY" ]; then
|
||||
echo "ERROR: Could not extract API key from datastore"
|
||||
docker exec changedet-app cat /datastore/changedetection.json
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ Extracted API key: ${API_KEY:0:8}..."
|
||||
|
||||
# Create a watch via API through nginx proxy subpath
|
||||
echo "Creating watch via POST to /changedet-sub/api/v1/watch"
|
||||
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/changedet-sub/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/test-nginx-proxy",
|
||||
"tag": "nginx-test"
|
||||
}')
|
||||
|
||||
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
||||
BODY=$(echo "$RESPONSE" | head -n-1)
|
||||
|
||||
if [ "$HTTP_CODE" != "201" ]; then
|
||||
echo "ERROR: Expected HTTP 201, got $HTTP_CODE"
|
||||
echo "Response: $BODY"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ Watch created successfully (HTTP 201)"
|
||||
|
||||
# Extract the watch UUID from response
|
||||
WATCH_UUID=$(echo "$BODY" | grep -o '"uuid": *"[^"]*"' | cut -d'"' -f4)
|
||||
echo "✓ Watch UUID: $WATCH_UUID"
|
||||
|
||||
# Update the watch via PUT through nginx proxy subpath
|
||||
echo "Updating watch via PUT to /changedet-sub/api/v1/watch/${WATCH_UUID}"
|
||||
RESPONSE=$(curl -s -w "\n%{http_code}" -X PUT "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"paused": true
|
||||
}')
|
||||
|
||||
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
||||
BODY=$(echo "$RESPONSE" | head -n-1)
|
||||
|
||||
if [ "$HTTP_CODE" != "200" ]; then
|
||||
echo "ERROR: Expected HTTP 200, got $HTTP_CODE"
|
||||
echo "Response: $BODY"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if echo "$BODY" | grep -q 'OK'; then
|
||||
echo "✓ Watch updated successfully (HTTP 200, response: OK)"
|
||||
else
|
||||
echo "ERROR: Expected response 'OK', got: $BODY"
|
||||
echo "Response: $BODY"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify the watch is paused via GET
|
||||
echo "Verifying watch is paused via GET"
|
||||
RESPONSE=$(curl -s "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
|
||||
-H "x-api-key: ${API_KEY}")
|
||||
|
||||
if echo "$RESPONSE" | grep -q '"paused": *true'; then
|
||||
echo "✓ Watch is paused as expected"
|
||||
else
|
||||
echo "ERROR: Watch paused state not confirmed"
|
||||
echo "Response: $RESPONSE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ API tests through nginx subpath completed successfully"
|
||||
|
||||
- name: Cleanup nginx test
|
||||
if: always()
|
||||
run: |
|
||||
docker logs nginx-proxy || true
|
||||
docker logs changedet-app || true
|
||||
docker stop nginx-proxy changedet-app || true
|
||||
docker rm nginx-proxy changedet-app || true
|
||||
|
||||
|
||||
|
||||
# Proxy tests
|
||||
proxy-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -504,7 +282,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -544,7 +322,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -564,29 +342,6 @@ jobs:
|
||||
cd changedetectionio
|
||||
./run_custom_browser_url_tests.sh
|
||||
|
||||
processor-plugin-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Basic processor plugin registration and checks
|
||||
run: |
|
||||
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
|
||||
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -598,7 +353,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -643,7 +398,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -685,154 +440,3 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
docker rm sig-test
|
||||
|
||||
# Upgrade path test
|
||||
upgrade-path-test:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 25
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0 # Fetch all history and tags for upgrade testing
|
||||
|
||||
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Check upgrade works without error
|
||||
run: |
|
||||
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
|
||||
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
|
||||
g++ \
|
||||
gcc \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
make \
|
||||
patch \
|
||||
pkg-config \
|
||||
zlib1g-dev
|
||||
|
||||
# Checkout old version and create datastore
|
||||
git checkout 0.49.1
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
pip install 'pyOpenSSL>=23.2.0'
|
||||
|
||||
echo "=== Running version 0.49.1 to create datastore ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true python3 ./changedetection.py -C -d /tmp/data &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready
|
||||
echo "Waiting for 0.49.1 to be ready..."
|
||||
sleep 6
|
||||
|
||||
# Extract API key from datastore (0.49.1 uses url-watches.json)
|
||||
API_KEY=$(jq -r '.settings.application.api_access_token // empty' /tmp/data/url-watches.json)
|
||||
echo "API Key: ${API_KEY:0:8}..."
|
||||
|
||||
# Create a watch with tag "github-group-test" via API
|
||||
echo "Creating test watch with tag via API..."
|
||||
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--show-error --fail \
|
||||
--retry 6 --retry-delay 1 --retry-connrefused \
|
||||
-d '{
|
||||
"url": "https://example.com/upgrade-test",
|
||||
"tag": "github-group-test"
|
||||
}'
|
||||
|
||||
echo "✓ Created watch with tag 'github-group-test'"
|
||||
|
||||
# Create a specific test URL watch
|
||||
echo "Creating test URL watch via API..."
|
||||
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--show-error --fail \
|
||||
-d '{
|
||||
"url": "http://localhost/test.txt"
|
||||
}'
|
||||
|
||||
echo "✓ Created watch for 'http://localhost/test.txt' in version 0.49.1"
|
||||
|
||||
# Stop the old version gracefully
|
||||
kill $APP_PID
|
||||
wait $APP_PID || true
|
||||
echo "✓ Version 0.49.1 stopped"
|
||||
|
||||
# Upgrade to current version (use commit SHA since we're in detached HEAD)
|
||||
echo "Upgrading to commit ${{ github.sha }}"
|
||||
git checkout ${{ github.sha }}
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
|
||||
|
||||
echo "=== Upgrade test output ==="
|
||||
cat /tmp/upgrade-test.log
|
||||
echo "✓ Datastore upgraded successfully"
|
||||
|
||||
# Now start the current version normally to verify the tag survived
|
||||
echo "=== Starting current version to verify tag exists after upgrade ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready and fetch UI
|
||||
echo "Waiting for current version to be ready..."
|
||||
sleep 5
|
||||
curl --retry 6 --retry-delay 1 --retry-connrefused --silent http://127.0.0.1:5000 > /tmp/ui-output.html
|
||||
|
||||
# Verify tag exists in UI
|
||||
if grep -q "github-group-test" /tmp/ui-output.html; then
|
||||
echo "✓ Tag 'github-group-test' found in UI after upgrade"
|
||||
else
|
||||
echo "ERROR: Tag 'github-group-test' not found in UI after upgrade"
|
||||
echo "=== UI Output ==="
|
||||
cat /tmp/ui-output.html
|
||||
echo "=== App Log ==="
|
||||
cat /tmp/ui-test.log
|
||||
kill $APP_PID || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify test URL exists in UI
|
||||
if grep -q "http://localhost/test.txt" /tmp/ui-output.html; then
|
||||
echo "✓ Watch URL 'http://localhost/test.txt' found in UI after upgrade"
|
||||
else
|
||||
echo "ERROR: Watch URL 'http://localhost/test.txt' not found in UI after upgrade"
|
||||
echo "=== UI Output ==="
|
||||
cat /tmp/ui-output.html
|
||||
echo "=== App Log ==="
|
||||
cat /tmp/ui-test.log
|
||||
kill $APP_PID || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
kill $APP_PID || true
|
||||
wait $APP_PID || true
|
||||
|
||||
echo ""
|
||||
echo "✓✓✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }} ✓✓✓"
|
||||
echo " - Commit: ${{ github.sha }}"
|
||||
echo " - Datastore migrated successfully"
|
||||
echo " - Tag 'github-group-test' survived upgrade"
|
||||
echo " - Watch URL 'http://localhost/test.txt' survived upgrade"
|
||||
|
||||
echo "✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }}"
|
||||
|
||||
- name: Upload upgrade test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/upgrade-test.log
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -29,4 +29,3 @@ test-datastore/
|
||||
|
||||
# Memory consumption log
|
||||
test-memory.log
|
||||
tests/logs/
|
||||
|
||||
15
Dockerfile
15
Dockerfile
@@ -78,12 +78,6 @@ RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
# Final image stage
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm
|
||||
LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"
|
||||
LABEL org.opencontainers.image.url="https://changedetection.io"
|
||||
LABEL org.opencontainers.image.documentation="https://changedetection.io/tutorials"
|
||||
LABEL org.opencontainers.image.title="changedetection.io"
|
||||
LABEL org.opencontainers.image.description="Self-hosted web page change monitoring and notification service"
|
||||
LABEL org.opencontainers.image.licenses="Apache-2.0"
|
||||
LABEL org.opencontainers.image.vendor="changedetection.io"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libxslt1.1 \
|
||||
@@ -138,15 +132,6 @@ ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy and set up entrypoint script for installing extra packages
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh
|
||||
|
||||
# Set entrypoint to handle EXTRA_PACKAGES env var
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
|
||||
# Default command (can be overridden in docker-compose.yml)
|
||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||
|
||||
|
||||
|
||||
@@ -9,15 +9,12 @@ recursive-include changedetectionio/notification *
|
||||
recursive-include changedetectionio/processors *
|
||||
recursive-include changedetectionio/realtime *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/store *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
recursive-include changedetectionio/translations *
|
||||
recursive-include changedetectionio/widgets *
|
||||
prune changedetectionio/static/package-lock.json
|
||||
prune changedetectionio/static/styles/node_modules
|
||||
prune changedetectionio/static/styles/package-lock.json
|
||||
include changedetectionio/favicon_utils.py
|
||||
include changedetection.py
|
||||
include requirements.txt
|
||||
include README-pip.md
|
||||
|
||||
@@ -183,9 +183,6 @@ docker compose pull && docker compose up -d
|
||||
|
||||
See the wiki for more information https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
## Different browser viewport sizes (mobile, desktop etc)
|
||||
|
||||
If you are using the recommended `sockpuppetbrowser` (which is in the docker-compose.yml as a setting to be uncommented) you can easily set different viewport sizes for your web page change detection, [see more information here about setting up different viewport sizes](https://github.com/dgtlmoon/sockpuppetbrowser?tab=readme-ov-file#setting-viewport-size).
|
||||
|
||||
## Filters
|
||||
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
[python: **.py]
|
||||
keywords = _:1,_l:1,gettext:1
|
||||
|
||||
[jinja2: **/templates/**.html]
|
||||
encoding = utf-8
|
||||
@@ -2,24 +2,23 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.6'
|
||||
__version__ = '0.51.4'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
from loguru import logger
|
||||
import getopt
|
||||
import logging
|
||||
import os
|
||||
import getopt
|
||||
import platform
|
||||
import signal
|
||||
import threading
|
||||
import time
|
||||
|
||||
import sys
|
||||
|
||||
# Eventlet completely removed - using threading mode for SocketIO
|
||||
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
|
||||
# Note: store and changedetection_app are imported inside main() to avoid
|
||||
# initialization before argument parsing (allows --help to work without loading everything)
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
from loguru import logger
|
||||
|
||||
# ==============================================================================
|
||||
# Multiprocessing Configuration - CRITICAL for Thread Safety
|
||||
@@ -42,10 +41,9 @@ import time
|
||||
#
|
||||
# IMPLEMENTATION:
|
||||
# 1. Explicit contexts everywhere (primary protection):
|
||||
# - Watch.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - playwright.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - puppeteer.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - isolated_opencv.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - isolated_libvips.py: ctx = multiprocessing.get_context('spawn')
|
||||
#
|
||||
# 2. Global default (defense-in-depth, below):
|
||||
# - Safety net if future code forgets explicit context
|
||||
@@ -61,22 +59,8 @@ import time
|
||||
# ==============================================================================
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
|
||||
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
|
||||
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
|
||||
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
|
||||
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
|
||||
if 'MALLOC_ARENA_MAX' not in os.environ:
|
||||
os.environ['MALLOC_ARENA_MAX'] = '2'
|
||||
try:
|
||||
import ctypes as _ctypes
|
||||
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||
if 'pytest' not in sys.modules:
|
||||
@@ -98,26 +82,15 @@ def get_version():
|
||||
def sigshutdown_handler(_signo, _stack_frame):
|
||||
name = signal.Signals(_signo).name
|
||||
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
|
||||
|
||||
|
||||
# Set exit flag immediately to stop all loops
|
||||
app.config.exit.set()
|
||||
datastore.stop_thread = True
|
||||
|
||||
# Log memory consumption before shutting down workers (cross-platform)
|
||||
try:
|
||||
import psutil
|
||||
process = psutil.Process()
|
||||
mem_info = process.memory_info()
|
||||
rss_mb = mem_info.rss / 1024 / 1024
|
||||
vms_mb = mem_info.vms / 1024 / 1024
|
||||
logger.info(f"Memory consumption before worker shutdown: RSS={rss_mb:,.2f} MB, VMS={vms_mb:,.2f} MB")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not retrieve memory stats: {str(e)}")
|
||||
|
||||
|
||||
# Shutdown workers and queues immediately
|
||||
try:
|
||||
from changedetectionio import worker_pool
|
||||
worker_pool.shutdown_workers()
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down workers: {str(e)}")
|
||||
|
||||
@@ -126,9 +99,9 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
update_q.close()
|
||||
notification_q.close()
|
||||
logger.debug("Queues closed successfully")
|
||||
logger.debug("Janus queues closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close queues: {e}")
|
||||
logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
|
||||
|
||||
# Shutdown socketio server fast
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
@@ -138,80 +111,31 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down Socket.IO server: {str(e)}")
|
||||
|
||||
# With immediate persistence, all data is already saved
|
||||
logger.success('All data already persisted (immediate commits enabled).')
|
||||
|
||||
# Save data quickly
|
||||
try:
|
||||
datastore.sync_to_json()
|
||||
logger.success('Fast sync to disk complete.')
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing to disk: {str(e)}")
|
||||
|
||||
sys.exit()
|
||||
|
||||
def print_help():
|
||||
"""Print help text for command line options"""
|
||||
print('Usage: changedetection.py [options]')
|
||||
print('')
|
||||
print('Standard options:')
|
||||
print(' -s SSL enable')
|
||||
print(' -h HOST Listen host (default: 0.0.0.0)')
|
||||
print(' -p PORT Listen port (default: 5000)')
|
||||
print(' -d PATH Datastore path')
|
||||
print(' -l LEVEL Log level (TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL)')
|
||||
print(' -c Cleanup unused snapshots')
|
||||
print(' -C Create datastore directory if it doesn\'t exist')
|
||||
print(' -P true/false Set all watches paused (true) or active (false)')
|
||||
print('')
|
||||
print('Add URLs on startup:')
|
||||
print(' -u URL Add URL to watch (can be used multiple times)')
|
||||
print(' -u0 \'JSON\' Set options for first -u URL (e.g. \'{"processor":"text_json_diff"}\')')
|
||||
print(' -u1 \'JSON\' Set options for second -u URL (0-indexed)')
|
||||
print(' -u2 \'JSON\' Set options for third -u URL, etc.')
|
||||
print(' Available options: processor, fetch_backend, headers, method, etc.')
|
||||
print(' See model/Watch.py for all available options')
|
||||
print('')
|
||||
print('Recheck on startup:')
|
||||
print(' -r all Queue all watches for recheck on startup')
|
||||
print(' -r UUID,... Queue specific watches (comma-separated UUIDs)')
|
||||
print(' -r all N Queue all watches, wait for completion, repeat N times')
|
||||
print(' -r UUID,... N Queue specific watches, wait for completion, repeat N times')
|
||||
print('')
|
||||
print('Batch mode:')
|
||||
print(' -b Run in batch mode (process queue then exit)')
|
||||
print(' Useful for CI/CD, cron jobs, or one-time checks')
|
||||
print(' NOTE: Batch mode checks if Flask is running and aborts if port is in use')
|
||||
print(' Use -p PORT to specify a different port if needed')
|
||||
print('')
|
||||
|
||||
def main():
|
||||
global datastore
|
||||
global app
|
||||
|
||||
# Early help/version check before any initialization
|
||||
if '--help' in sys.argv or '-help' in sys.argv:
|
||||
print_help()
|
||||
sys.exit(0)
|
||||
|
||||
if '--version' in sys.argv or '-v' in sys.argv:
|
||||
print(f'changedetection.io {__version__}')
|
||||
sys.exit(0)
|
||||
|
||||
# Import heavy modules after help/version checks to keep startup fast for those flags
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
# Optional URL to watch since start
|
||||
default_url = None
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
include_default_watches = True
|
||||
all_paused = None # None means don't change, True/False to set
|
||||
|
||||
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
|
||||
port = int(os.environ.get('PORT', 5000))
|
||||
ssl_mode = False
|
||||
|
||||
# Lists for multiple URLs and their options
|
||||
urls_to_add = []
|
||||
url_options = {} # Key: index (0-based), Value: dict of options
|
||||
recheck_watches = None # None, 'all', or list of UUIDs
|
||||
recheck_repeat_count = 1 # Number of times to repeat recheck cycle
|
||||
batch_mode = False # Run once then exit when queue is empty
|
||||
|
||||
# On Windows, create and use a default path.
|
||||
if os.name == 'nt':
|
||||
datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
|
||||
@@ -220,68 +144,10 @@ def main():
|
||||
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
|
||||
datastore_path = os.path.join(os.getcwd(), "../datastore")
|
||||
|
||||
# Pre-process arguments to extract -u, -u<N>, and -r options before getopt
|
||||
# This allows unlimited -u0, -u1, -u2, ... options without predefining them
|
||||
cleaned_argv = ['changedetection.py'] # Start with program name
|
||||
i = 1
|
||||
while i < len(sys.argv):
|
||||
arg = sys.argv[i]
|
||||
|
||||
# Handle -u (add URL)
|
||||
if arg == '-u' and i + 1 < len(sys.argv):
|
||||
urls_to_add.append(sys.argv[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -u<N> (set options for URL at index N)
|
||||
if arg.startswith('-u') and len(arg) > 2 and arg[2:].isdigit():
|
||||
idx = int(arg[2:])
|
||||
if i + 1 < len(sys.argv):
|
||||
try:
|
||||
import json
|
||||
url_options[idx] = json.loads(sys.argv[i + 1])
|
||||
except json.JSONDecodeError as e:
|
||||
print(f'Error: Invalid JSON for {arg}: {sys.argv[i + 1]}')
|
||||
print(f'JSON decode error: {e}')
|
||||
sys.exit(2)
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -r (recheck watches)
|
||||
if arg == '-r' and i + 1 < len(sys.argv):
|
||||
recheck_arg = sys.argv[i + 1]
|
||||
if recheck_arg.lower() == 'all':
|
||||
recheck_watches = 'all'
|
||||
else:
|
||||
# Parse comma-separated list of UUIDs
|
||||
recheck_watches = [uuid.strip() for uuid in recheck_arg.split(',') if uuid.strip()]
|
||||
|
||||
# Check for optional repeat count as third argument
|
||||
if i + 2 < len(sys.argv) and sys.argv[i + 2].isdigit():
|
||||
recheck_repeat_count = int(sys.argv[i + 2])
|
||||
if recheck_repeat_count < 1:
|
||||
print(f'Error: Repeat count must be at least 1, got {recheck_repeat_count}')
|
||||
sys.exit(2)
|
||||
i += 3
|
||||
else:
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -b (batch mode - run once and exit)
|
||||
if arg == '-b':
|
||||
batch_mode = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Keep other arguments for getopt
|
||||
cleaned_argv.append(arg)
|
||||
i += 1
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(cleaned_argv[1:], "6Csd:h:p:l:P:", "port")
|
||||
except getopt.GetoptError as e:
|
||||
print_help()
|
||||
print(f'Error: {e}')
|
||||
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
|
||||
sys.exit(2)
|
||||
|
||||
create_datastore_dir = False
|
||||
@@ -306,6 +172,14 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
if opt == '-u':
|
||||
default_url = arg
|
||||
include_default_watches = False
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
|
||||
# Create the datadir if it doesnt exist
|
||||
if opt == '-C':
|
||||
create_datastore_dir = True
|
||||
@@ -313,18 +187,6 @@ def main():
|
||||
if opt == '-l':
|
||||
logger_level = int(arg) if arg.isdigit() else arg.upper()
|
||||
|
||||
if opt == '-P':
|
||||
try:
|
||||
all_paused = bool(strtobool(arg))
|
||||
except ValueError:
|
||||
print(f'Error: Invalid value for -P option: {arg}')
|
||||
print('Expected: true, false, yes, no, 1, or 0')
|
||||
sys.exit(2)
|
||||
|
||||
# If URLs are provided, don't include default watches
|
||||
if urls_to_add:
|
||||
include_default_watches = False
|
||||
|
||||
|
||||
logger.success(f"changedetection.io version {get_version()} starting.")
|
||||
# Launch using SocketIO run method for proper integration (if enabled)
|
||||
@@ -361,16 +223,11 @@ def main():
|
||||
logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING)
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {
|
||||
'datastore_path': datastore_path,
|
||||
'batch_mode': batch_mode,
|
||||
'recheck_watches': recheck_watches,
|
||||
'recheck_repeat_count': recheck_repeat_count
|
||||
}
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
|
||||
if not os.path.isdir(app_config['datastore_path']):
|
||||
if create_datastore_dir:
|
||||
os.makedirs(app_config['datastore_path'], exist_ok=True)
|
||||
os.mkdir(app_config['datastore_path'])
|
||||
else:
|
||||
logger.critical(
|
||||
f"ERROR: Directory path for the datastore '{app_config['datastore_path']}'"
|
||||
@@ -385,219 +242,17 @@ def main():
|
||||
# Dont' start if the JSON DB looks corrupt
|
||||
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
|
||||
logger.critical(str(e))
|
||||
sys.exit(1)
|
||||
|
||||
# Testing mode: Exit cleanly after datastore initialization (for CI/CD upgrade tests)
|
||||
if os.environ.get('TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD'):
|
||||
logger.success(f"TESTING MODE: Datastore loaded successfully from {app_config['datastore_path']}")
|
||||
logger.success(f"TESTING MODE: Schema version: {datastore.data['settings']['application'].get('schema_version', 'unknown')}")
|
||||
logger.success(f"TESTING MODE: Loaded {len(datastore.data['watching'])} watches")
|
||||
logger.success("TESTING MODE: Exiting cleanly (TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD is set)")
|
||||
sys.exit(0)
|
||||
|
||||
# Apply all_paused setting if specified via CLI
|
||||
if all_paused is not None:
|
||||
datastore.data['settings']['application']['all_paused'] = all_paused
|
||||
logger.info(f"Setting all watches paused: {all_paused}")
|
||||
return
|
||||
|
||||
# Inject datastore into plugins that need access to settings
|
||||
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
|
||||
inject_datastore_into_plugins(datastore)
|
||||
|
||||
# Step 1: Add URLs with their options (if provided via -u flags)
|
||||
added_watch_uuids = []
|
||||
if urls_to_add:
|
||||
logger.info(f"Adding {len(urls_to_add)} URL(s) from command line")
|
||||
for idx, url in enumerate(urls_to_add):
|
||||
extras = url_options.get(idx, {})
|
||||
if extras:
|
||||
logger.debug(f"Adding watch {idx}: {url} with options: {extras}")
|
||||
else:
|
||||
logger.debug(f"Adding watch {idx}: {url}")
|
||||
|
||||
new_uuid = datastore.add_watch(url=url, extras=extras)
|
||||
if new_uuid:
|
||||
added_watch_uuids.append(new_uuid)
|
||||
logger.success(f"Added watch: {url} (UUID: {new_uuid})")
|
||||
else:
|
||||
logger.error(f"Failed to add watch: {url}")
|
||||
if default_url:
|
||||
datastore.add_watch(url = default_url)
|
||||
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Step 2: Queue newly added watches (if -u was provided in batch mode)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if batch_mode and added_watch_uuids:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
logger.info(f"Batch mode: Queuing {len(added_watch_uuids)} newly added watches")
|
||||
for watch_uuid in added_watch_uuids:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
logger.debug(f"Queued newly added watch: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
|
||||
# Step 3: Queue watches for recheck (if -r was provided)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if recheck_watches is not None:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
# Queue all watches, excluding those already queued in batch mode
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids:
|
||||
# Exclude newly added watches that were already queued in batch mode
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
logger.info(f"Queuing {len(watches_to_queue)} existing watches for recheck ({len(added_watch_uuids)} newly added watches already queued)")
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Queuing all {len(watches_to_queue)} watches for recheck")
|
||||
else:
|
||||
# Queue specific UUIDs
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Queuing {len(watches_to_queue)} specific watches for recheck")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
logger.debug(f"Queued watch for recheck: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
|
||||
logger.success(f"Successfully queued {queued_count} watches for recheck")
|
||||
|
||||
# Step 4: Setup batch mode monitor (if -b was provided)
|
||||
if batch_mode:
|
||||
from changedetectionio.flask_app import update_q
|
||||
|
||||
# Safety check: Ensure Flask app is not already running on this port
|
||||
# Batch mode should never run alongside the web server
|
||||
import socket
|
||||
test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
|
||||
try:
|
||||
# Try to bind to the configured host:port (no SO_REUSEADDR - strict check)
|
||||
test_socket.bind((host, port))
|
||||
test_socket.close()
|
||||
logger.debug(f"Batch mode: Port {port} is available (Flask app not running)")
|
||||
except OSError as e:
|
||||
test_socket.close()
|
||||
# errno 98 = EADDRINUSE (Linux)
|
||||
# errno 48 = EADDRINUSE (macOS)
|
||||
# errno 10048 = WSAEADDRINUSE (Windows)
|
||||
if e.errno in (48, 98, 10048) or "Address already in use" in str(e) or "already in use" in str(e).lower():
|
||||
logger.critical(f"ERROR: Batch mode cannot run - port {port} is already in use")
|
||||
logger.critical(f"The Flask web server appears to be running on {host}:{port}")
|
||||
logger.critical(f"Batch mode is designed for standalone operation (CI/CD, cron jobs, etc.)")
|
||||
logger.critical(f"Please either stop the Flask web server, or use a different port with -p PORT")
|
||||
sys.exit(1)
|
||||
else:
|
||||
# Some other socket error - log but continue (might be network configuration issue)
|
||||
logger.warning(f"Port availability check failed with unexpected error: {e}")
|
||||
logger.warning(f"Continuing with batch mode anyway - be aware of potential conflicts")
|
||||
|
||||
def queue_watches_for_recheck(datastore, iteration):
|
||||
"""Helper function to queue watches for recheck"""
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids and iteration == 1:
|
||||
# Only exclude newly added watches on first iteration
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing all {len(watches_to_queue)} watches")
|
||||
elif recheck_watches:
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing {len(watches_to_queue)} specific watches")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
logger.success(f"Batch mode (iteration {iteration}): Successfully queued {queued_count} watches")
|
||||
return queued_count
|
||||
|
||||
def batch_mode_monitor():
|
||||
"""Monitor queue and workers, shutdown or repeat when work is complete"""
|
||||
import time
|
||||
|
||||
# Track iterations if repeat mode is enabled
|
||||
current_iteration = 1
|
||||
total_iterations = recheck_repeat_count if recheck_watches and recheck_repeat_count > 1 else 1
|
||||
|
||||
if total_iterations > 1:
|
||||
logger.info(f"Batch mode: Will repeat recheck {total_iterations} times")
|
||||
else:
|
||||
logger.info("Batch mode: Waiting for all queued items to complete...")
|
||||
|
||||
# Wait a bit for workers to start processing
|
||||
time.sleep(3)
|
||||
|
||||
try:
|
||||
while current_iteration <= total_iterations:
|
||||
logger.info(f"Batch mode: Waiting for iteration {current_iteration}/{total_iterations} to complete...")
|
||||
|
||||
# Use the shared wait_for_all_checks function
|
||||
completed = worker_pool.wait_for_all_checks(update_q, timeout=300)
|
||||
|
||||
if not completed:
|
||||
logger.warning(f"Batch mode: Iteration {current_iteration} timed out after 300 seconds")
|
||||
|
||||
logger.success(f"Batch mode: Iteration {current_iteration}/{total_iterations} completed")
|
||||
|
||||
# Check if we need to repeat
|
||||
if current_iteration < total_iterations:
|
||||
logger.info(f"Batch mode: Starting iteration {current_iteration + 1}...")
|
||||
current_iteration += 1
|
||||
|
||||
# Re-queue watches for next iteration
|
||||
queue_watches_for_recheck(datastore, current_iteration)
|
||||
|
||||
# Brief pause before continuing
|
||||
time.sleep(2)
|
||||
else:
|
||||
# All iterations complete
|
||||
logger.success(f"Batch mode: All {total_iterations} iterations completed, initiating shutdown")
|
||||
# Trigger shutdown
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch mode monitor error: {e}")
|
||||
logger.error(f"Initiating emergency shutdown")
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
|
||||
# Start monitor in background thread
|
||||
monitor_thread = threading.Thread(target=batch_mode_monitor, daemon=True, name="BatchModeMonitor")
|
||||
monitor_thread.start()
|
||||
logger.info("Batch mode enabled: Will exit after all queued items are processed")
|
||||
|
||||
# Get the SocketIO instance from the Flask app (created in flask_app.py)
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
global socketio
|
||||
@@ -619,17 +274,19 @@ def main():
|
||||
else:
|
||||
logger.info("SIGUSR1 handler only registered on Linux, skipped.")
|
||||
|
||||
# Go into cleanup mode
|
||||
if do_cleanup:
|
||||
datastore.remove_unused_snapshots()
|
||||
|
||||
app.config['datastore_path'] = datastore_path
|
||||
|
||||
|
||||
@app.context_processor
|
||||
def inject_template_globals():
|
||||
return dict(right_sticky="v"+__version__,
|
||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False,
|
||||
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
|
||||
all_paused=datastore.data['settings']['application'].get('all_paused', False),
|
||||
all_muted=datastore.data['settings']['application'].get('all_muted', False)
|
||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
)
|
||||
|
||||
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
|
||||
@@ -651,43 +308,23 @@ def main():
|
||||
if os.getenv('USE_X_SETTINGS'):
|
||||
logger.info("USE_X_SETTINGS is ENABLED")
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
app.wsgi_app = ProxyFix(
|
||||
app.wsgi_app,
|
||||
x_for=1, # X-Forwarded-For (client IP)
|
||||
x_proto=1, # X-Forwarded-Proto (http/https)
|
||||
x_host=1, # X-Forwarded-Host (original host)
|
||||
x_port=1, # X-Forwarded-Port (original port)
|
||||
x_prefix=1 # X-Forwarded-Prefix (URL prefix)
|
||||
)
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
||||
|
||||
|
||||
# In batch mode, skip starting the HTTP server - just keep workers running
|
||||
if batch_mode:
|
||||
logger.info("Batch mode: Skipping HTTP server startup, workers will process queue")
|
||||
logger.info("Batch mode: Main thread will wait for shutdown signal")
|
||||
# Keep main thread alive until batch monitor triggers shutdown
|
||||
try:
|
||||
while True:
|
||||
time.sleep(1)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Batch mode: Keyboard interrupt received")
|
||||
pass
|
||||
else:
|
||||
# Normal mode: Start HTTP server
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
|
||||
@@ -4,10 +4,6 @@ from flask import request
|
||||
from functools import wraps
|
||||
from . import auth, validate_openapi_request
|
||||
from ..validate_url import is_safe_valid_url
|
||||
import json
|
||||
|
||||
# Number of URLs above which import switches to background processing
|
||||
IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD = 20
|
||||
|
||||
|
||||
def default_content_type(content_type='text/plain'):
|
||||
@@ -23,76 +19,6 @@ def default_content_type(content_type='text/plain'):
|
||||
return decorator
|
||||
|
||||
|
||||
def convert_query_param_to_type(value, schema_property):
|
||||
"""
|
||||
Convert a query parameter string to the appropriate type based on schema definition.
|
||||
|
||||
Args:
|
||||
value: String value from query parameter
|
||||
schema_property: Schema property definition with 'type' or 'anyOf' field
|
||||
|
||||
Returns:
|
||||
Converted value in the appropriate type
|
||||
|
||||
Supports both OpenAPI 3.1 formats:
|
||||
- type: [string, 'null'] (array format)
|
||||
- anyOf: [{type: string}, {type: null}] (anyOf format)
|
||||
"""
|
||||
prop_type = schema_property.get('type')
|
||||
|
||||
# Handle OpenAPI 3.1 type arrays: type: [string, 'null']
|
||||
if isinstance(prop_type, list):
|
||||
# Use the first non-null type from the array
|
||||
for t in prop_type:
|
||||
if t != 'null':
|
||||
prop_type = t
|
||||
break
|
||||
else:
|
||||
prop_type = None
|
||||
|
||||
# Handle anyOf schemas (older format)
|
||||
elif 'anyOf' in schema_property:
|
||||
# Use the first non-null type from anyOf
|
||||
for option in schema_property['anyOf']:
|
||||
if option.get('type') and option.get('type') != 'null':
|
||||
prop_type = option.get('type')
|
||||
break
|
||||
else:
|
||||
prop_type = None
|
||||
|
||||
# Handle array type (e.g., notification_urls)
|
||||
if prop_type == 'array':
|
||||
# Support both comma-separated and JSON array format
|
||||
if value.startswith('['):
|
||||
try:
|
||||
return json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
return [v.strip() for v in value.split(',')]
|
||||
return [v.strip() for v in value.split(',')]
|
||||
|
||||
# Handle object type (e.g., time_between_check, headers)
|
||||
elif prop_type == 'object':
|
||||
try:
|
||||
return json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError(f"Invalid JSON object for field: {value}")
|
||||
|
||||
# Handle boolean type
|
||||
elif prop_type == 'boolean':
|
||||
return strtobool(value)
|
||||
|
||||
# Handle integer type
|
||||
elif prop_type == 'integer':
|
||||
return int(value)
|
||||
|
||||
# Handle number type (float)
|
||||
elif prop_type == 'number':
|
||||
return float(value)
|
||||
|
||||
# Default: return as string
|
||||
return value
|
||||
|
||||
|
||||
class Import(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
@@ -102,135 +28,40 @@ class Import(Resource):
|
||||
@default_content_type('text/plain') #3547 #3542
|
||||
@validate_openapi_request('importWatches')
|
||||
def post(self):
|
||||
"""Import a list of watched URLs with optional watch configuration."""
|
||||
from . import get_watch_schema_properties
|
||||
# Special parameters that are NOT watch configuration
|
||||
special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'}
|
||||
"""Import a list of watched URLs."""
|
||||
|
||||
extras = {}
|
||||
|
||||
# Handle special 'proxy' parameter
|
||||
if request.args.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not request.args.get('proxy') in plist:
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
else:
|
||||
extras['proxy'] = request.args.get('proxy')
|
||||
|
||||
# Handle special 'dedupe' parameter
|
||||
dedupe = strtobool(request.args.get('dedupe', 'true'))
|
||||
|
||||
# Handle special 'tag' and 'tag_uuids' parameters
|
||||
tags = request.args.get('tag')
|
||||
tag_uuids = request.args.get('tag_uuids')
|
||||
|
||||
if tag_uuids:
|
||||
tag_uuids = tag_uuids.split(',')
|
||||
|
||||
# Extract ALL other query parameters as watch configuration
|
||||
# Get schema from OpenAPI spec (replaces old schema_create_watch)
|
||||
schema_properties = get_watch_schema_properties()
|
||||
for param_name, param_value in request.args.items():
|
||||
# Skip special parameters
|
||||
if param_name in special_params:
|
||||
continue
|
||||
|
||||
# Skip if not in schema (unknown parameter)
|
||||
if param_name not in schema_properties:
|
||||
return f"Unknown watch configuration parameter: {param_name}", 400
|
||||
|
||||
# Convert to appropriate type based on schema
|
||||
try:
|
||||
converted_value = convert_query_param_to_type(param_value, schema_properties[param_name])
|
||||
extras[param_name] = converted_value
|
||||
except (ValueError, json.JSONDecodeError) as e:
|
||||
return f"Invalid value for parameter '{param_name}': {str(e)}", 400
|
||||
|
||||
# Validate processor if provided
|
||||
if 'processor' in extras:
|
||||
from changedetectionio.processors import available_processors
|
||||
available = [p[0] for p in available_processors()]
|
||||
if extras['processor'] not in available:
|
||||
return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
|
||||
|
||||
# Validate fetch_backend if provided (legacy API compat — still accepted, stored as-is)
|
||||
if 'fetch_backend' in extras:
|
||||
from changedetectionio.content_fetchers import available_fetchers
|
||||
available = [f[0] for f in available_fetchers()]
|
||||
is_valid = (
|
||||
extras['fetch_backend'] == 'system' or
|
||||
extras['fetch_backend'] in available or
|
||||
extras['fetch_backend'].startswith('extra_browser_')
|
||||
)
|
||||
if not is_valid:
|
||||
return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
|
||||
|
||||
# Validate browser_profile if provided
|
||||
if 'browser_profile' in extras:
|
||||
from changedetectionio.model.browser_profile import get_builtin_profiles, RESERVED_MACHINE_NAMES
|
||||
store_profiles = self.datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
known = set(get_builtin_profiles().keys()) | set(store_profiles.keys()) | {'system', None}
|
||||
if extras['browser_profile'] not in known:
|
||||
return f"Invalid browser_profile '{extras['browser_profile']}'. Available: {', '.join(str(k) for k in known)}", 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in extras:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
validate_notification_urls(extras['notification_urls'])
|
||||
except ValidationError as e:
|
||||
return f"Invalid notification_urls: {str(e)}", 400
|
||||
|
||||
urls = request.get_data().decode('utf8').splitlines()
|
||||
# Clean and validate URLs upfront
|
||||
urls_to_import = []
|
||||
added = []
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if not len(url):
|
||||
continue
|
||||
|
||||
# Validate URL
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
if not is_safe_valid_url(url):
|
||||
return f"Invalid or unsupported URL - {url}", 400
|
||||
|
||||
# Check for duplicates if dedupe is enabled
|
||||
if dedupe and self.datastore.url_exists(url):
|
||||
continue
|
||||
|
||||
urls_to_import.append(url)
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added.append(new_uuid)
|
||||
|
||||
# For small imports, process synchronously for immediate feedback
|
||||
if len(urls_to_import) < IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD:
|
||||
added = []
|
||||
for url in urls_to_import:
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added.append(new_uuid)
|
||||
return added, 200
|
||||
|
||||
# For large imports (>= 20), process in background thread
|
||||
else:
|
||||
import threading
|
||||
from loguru import logger
|
||||
|
||||
def import_watches_background():
|
||||
"""Background thread to import watches - discarded after completion."""
|
||||
try:
|
||||
added_count = 0
|
||||
for url in urls_to_import:
|
||||
try:
|
||||
self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error importing URL {url}: {e}")
|
||||
|
||||
logger.info(f"Background import complete: {added_count} watches created")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background import: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=import_watches_background, daemon=True, name="ImportWatches-Background")
|
||||
thread.start()
|
||||
|
||||
return {'status': f'Importing {len(urls_to_import)} URLs in background', 'count': len(urls_to_import)}, 202
|
||||
return added
|
||||
@@ -1,6 +1,8 @@
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import Resource, abort
|
||||
from flask import request
|
||||
from . import auth, validate_openapi_request
|
||||
from . import schema_create_notification_urls, schema_delete_notification_urls
|
||||
|
||||
class Notifications(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
@@ -20,6 +22,7 @@ class Notifications(Resource):
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('addNotifications')
|
||||
@expects_json(schema_create_notification_urls)
|
||||
def post(self):
|
||||
"""Create Notification URLs."""
|
||||
|
||||
@@ -47,6 +50,7 @@ class Notifications(Resource):
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('replaceNotifications')
|
||||
@expects_json(schema_create_notification_urls)
|
||||
def put(self):
|
||||
"""Replace Notification URLs."""
|
||||
json_data = request.get_json()
|
||||
@@ -63,12 +67,13 @@ class Notifications(Resource):
|
||||
|
||||
clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)]
|
||||
self.datastore.data['settings']['application']['notification_urls'] = clean_urls
|
||||
self.datastore.commit()
|
||||
self.datastore.needs_write = True
|
||||
|
||||
return {'notification_urls': clean_urls}, 200
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteNotifications')
|
||||
@expects_json(schema_delete_notification_urls)
|
||||
def delete(self):
|
||||
"""Delete Notification URLs."""
|
||||
|
||||
@@ -90,7 +95,7 @@ class Notifications(Resource):
|
||||
abort(400, message="No matching notification URLs found.")
|
||||
|
||||
self.datastore.data['settings']['application']['notification_urls'] = notification_urls
|
||||
self.datastore.commit()
|
||||
self.datastore.needs_write = True
|
||||
|
||||
return 'OK', 204
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
import functools
|
||||
from flask import make_response
|
||||
from flask_restful import Resource
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _get_spec_yaml():
|
||||
"""Build and cache the merged spec as a YAML string (only serialized once per process)."""
|
||||
import yaml
|
||||
from changedetectionio.api import build_merged_spec_dict
|
||||
return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True)
|
||||
|
||||
|
||||
class Spec(Resource):
|
||||
def get(self):
|
||||
"""Return the merged OpenAPI spec including all registered processor extensions."""
|
||||
return make_response(
|
||||
_get_spec_yaml(),
|
||||
200,
|
||||
{'Content-Type': 'application/yaml'}
|
||||
)
|
||||
@@ -1,13 +1,13 @@
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio import worker_handler
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
|
||||
import threading
|
||||
from flask import request
|
||||
from . import auth
|
||||
|
||||
from . import validate_openapi_request
|
||||
# Import schemas from __init__.py
|
||||
from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request
|
||||
|
||||
|
||||
class Tag(Resource):
|
||||
@@ -17,78 +17,38 @@ class Tag(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single tag
|
||||
# curl http://localhost:5000/api/v1/tag/<uuid_str:uuid>
|
||||
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getTag')
|
||||
def get(self, uuid):
|
||||
"""Get data for a single tag/group, toggle notification muting, or recheck all."""
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
|
||||
from copy import deepcopy
|
||||
tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
|
||||
if not tag:
|
||||
abort(404, message=f'No tag exists with the UUID of {uuid}')
|
||||
|
||||
if request.args.get('recheck'):
|
||||
# Recheck all watches with this tag, including muted
|
||||
# First collect watches to queue
|
||||
watches_to_queue = []
|
||||
# Recheck all, including muted
|
||||
# Get most overdue first
|
||||
i=0
|
||||
for k in sorted(self.datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||
watch_uuid = k[0]
|
||||
watch = k[1]
|
||||
if not watch['paused'] and tag['uuid'] in watch['tags']:
|
||||
watches_to_queue.append(watch_uuid)
|
||||
if not watch['paused'] and tag['uuid'] not in watch['tags']:
|
||||
continue
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
i+=1
|
||||
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
return {'status': f'OK, queued {len(watches_to_queue)} watches for rechecking'}, 200
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
def queue_watches_background():
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
try:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
logger.info(f"Background queueing complete for tag {tag['uuid']}: {len(watches_to_queue)} watches queued")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing for tag {tag['uuid']}: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=queue_watches_background, daemon=True, name=f"QueueTag-{tag['uuid'][:8]}")
|
||||
thread.start()
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
return f"OK, {i} watches queued", 200
|
||||
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
tag['notification_muted'] = True
|
||||
tag.commit()
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
tag['notification_muted'] = False
|
||||
tag.commit()
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False
|
||||
return "OK", 200
|
||||
|
||||
# Filter out Watch-specific runtime fields that don't apply to Tags (yet)
|
||||
# TODO: Future enhancement - aggregate these values from all Watches that have this tag:
|
||||
# - check_count: sum of all watches' check_count
|
||||
# - last_checked: most recent last_checked from all watches
|
||||
# - last_changed: most recent last_changed from all watches
|
||||
# - consecutive_filter_failures: count of watches with failures
|
||||
# - etc.
|
||||
# These come from watch_base inheritance but currently have no meaningful value for Tags
|
||||
watch_only_fields = {
|
||||
'browser_steps_last_error_step', 'check_count', 'consecutive_filter_failures',
|
||||
'content-type', 'fetch_time', 'last_changed', 'last_checked', 'last_error',
|
||||
'last_notification_error', 'last_viewed', 'notification_alert_count',
|
||||
'page_title', 'previous_md5', 'remote_server_reply'
|
||||
}
|
||||
|
||||
# Create clean tag dict without Watch-specific fields
|
||||
clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields}
|
||||
|
||||
# fetch_backend is a legacy field superseded by browser_profile — omit from API response
|
||||
clean_tag.pop('fetch_backend', None)
|
||||
|
||||
return clean_tag
|
||||
return tag
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteTag')
|
||||
@@ -99,94 +59,41 @@ class Tag(Resource):
|
||||
|
||||
# Delete the tag, and any tag reference
|
||||
del self.datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
|
||||
# Remove tag from all watches
|
||||
for watch_uuid, watch in self.datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
watch['tags'].remove(uuid)
|
||||
watch.commit()
|
||||
|
||||
return 'OK', 204
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('updateTag')
|
||||
@expects_json(schema_update_tag)
|
||||
def put(self, uuid):
|
||||
"""Update tag information."""
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if not tag:
|
||||
abort(404, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Make a mutable copy of request.json for modification
|
||||
json_data = dict(request.json)
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in json_data:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = json_data.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# Filter out readOnly fields (extracted from OpenAPI spec Tag schema)
|
||||
# These are system-managed fields that should never be user-settable
|
||||
from . import get_readonly_tag_fields
|
||||
readonly_fields = get_readonly_tag_fields()
|
||||
|
||||
# Tag model inherits from watch_base but has no @property attributes of its own
|
||||
# So we only need to filter readOnly fields
|
||||
for field in readonly_fields:
|
||||
json_data.pop(field, None)
|
||||
|
||||
# Validate remaining fields - reject truly unknown fields
|
||||
# Get valid fields from Tag schema
|
||||
from . import get_tag_schema_properties
|
||||
valid_fields = set(get_tag_schema_properties().keys())
|
||||
|
||||
# Check for unknown fields
|
||||
unknown_fields = set(json_data.keys()) - valid_fields
|
||||
if unknown_fields:
|
||||
return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
|
||||
|
||||
tag.update(json_data)
|
||||
tag.commit()
|
||||
|
||||
# Clear checksums for all watches using this tag to force reprocessing
|
||||
# Tag changes affect inherited configuration
|
||||
cleared_count = self.datastore.clear_checksums_for_tag(uuid)
|
||||
logger.info(f"Tag {uuid} updated via API, cleared {cleared_count} watch checksums")
|
||||
tag.update(request.json)
|
||||
self.datastore.needs_write_urgent = True
|
||||
|
||||
return "OK", 200
|
||||
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('createTag')
|
||||
# Only cares for {'title': 'xxxx'}
|
||||
def post(self):
|
||||
"""Create a single tag/group."""
|
||||
|
||||
json_data = request.get_json()
|
||||
title = json_data.get("title",'').strip()
|
||||
|
||||
# Validate that only valid fields are provided
|
||||
# Get valid fields from Tag schema
|
||||
from . import get_tag_schema_properties
|
||||
valid_fields = set(get_tag_schema_properties().keys())
|
||||
|
||||
# Check for unknown fields
|
||||
unknown_fields = set(json_data.keys()) - valid_fields
|
||||
if unknown_fields:
|
||||
return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
|
||||
|
||||
new_uuid = self.datastore.add_tag(title=title)
|
||||
if new_uuid:
|
||||
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
|
||||
extra = {k: v for k, v in json_data.items() if k != 'title'}
|
||||
if extra:
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
|
||||
if tag:
|
||||
tag.update(extra)
|
||||
tag.commit()
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported tag", 400
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
import os
|
||||
import threading
|
||||
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
from . import auth
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio import worker_handler
|
||||
from flask import request, make_response, send_from_directory
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
import copy
|
||||
|
||||
from . import validate_openapi_request, get_readonly_watch_fields
|
||||
# Import schemas from __init__.py
|
||||
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
|
||||
from ..notification import valid_notification_formats
|
||||
from ..notification.handler import newline_re
|
||||
|
||||
@@ -57,56 +57,41 @@ class Watch(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single watch, excluding the history list (can be large)
|
||||
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>
|
||||
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
||||
# ?recheck=true
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatch')
|
||||
def get(self, uuid):
|
||||
"""Get information about a single watch, recheck, pause, or mute."""
|
||||
# Get watch reference first (for pause/mute operations)
|
||||
watch_obj = self.datastore.data['watching'].get(uuid)
|
||||
if not watch_obj:
|
||||
from copy import deepcopy
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
if not watch:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Create a dict copy for JSON response (with lock for thread safety)
|
||||
# This is much faster than deepcopy and doesn't copy the datastore reference
|
||||
# WARNING: dict() is a SHALLOW copy - nested dicts are shared with original!
|
||||
# Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts
|
||||
# If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj))
|
||||
with self.datastore.lock:
|
||||
watch = dict(watch_obj)
|
||||
|
||||
if request.args.get('recheck'):
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return "OK", 200
|
||||
if request.args.get('paused', '') == 'paused':
|
||||
watch_obj.pause()
|
||||
watch_obj.commit()
|
||||
self.datastore.data['watching'].get(uuid).pause()
|
||||
return "OK", 200
|
||||
elif request.args.get('paused', '') == 'unpaused':
|
||||
watch_obj.unpause()
|
||||
watch_obj.commit()
|
||||
self.datastore.data['watching'].get(uuid).unpause()
|
||||
return "OK", 200
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
watch_obj.mute()
|
||||
watch_obj.commit()
|
||||
self.datastore.data['watching'].get(uuid).mute()
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
watch_obj.unmute()
|
||||
watch_obj.commit()
|
||||
self.datastore.data['watching'].get(uuid).unmute()
|
||||
return "OK", 200
|
||||
|
||||
# Return without history, get that via another API call
|
||||
# Properties are not returned as a JSON, so add the required props manually
|
||||
watch['history_n'] = watch_obj.history_n
|
||||
watch['history_n'] = watch.history_n
|
||||
# attr .last_changed will check for the last written text snapshot on change
|
||||
watch['last_changed'] = watch_obj.last_changed
|
||||
watch['viewed'] = watch_obj.viewed
|
||||
watch['link'] = watch_obj.link,
|
||||
|
||||
# fetch_backend is a legacy field superseded by browser_profile — omit from API response
|
||||
watch.pop('fetch_backend', None)
|
||||
watch['last_changed'] = watch.last_changed
|
||||
watch['viewed'] = watch.viewed
|
||||
watch['link'] = watch.link,
|
||||
|
||||
return watch
|
||||
|
||||
@@ -122,6 +107,7 @@ class Watch(Resource):
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('updateWatch')
|
||||
@expects_json(schema_update_watch)
|
||||
def put(self, uuid):
|
||||
"""Update watch information."""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
@@ -130,86 +116,72 @@ class Watch(Resource):
|
||||
|
||||
if request.json.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not plist or request.json.get('proxy') not in plist:
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
if not request.json.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(request.json)
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in request.json:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = request.json.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# XSS etc protection - validate URL if it's being updated
|
||||
if 'url' in request.json:
|
||||
new_url = request.json.get('url')
|
||||
|
||||
# URL must be a non-empty string
|
||||
if new_url is None:
|
||||
return "URL cannot be null", 400
|
||||
|
||||
if not isinstance(new_url, str):
|
||||
return "URL must be a string", 400
|
||||
|
||||
if not new_url.strip():
|
||||
return "URL cannot be empty or whitespace only", 400
|
||||
|
||||
if not is_safe_valid_url(new_url.strip()):
|
||||
return "Invalid or unsupported URL format. URL must use http://, https://, or ftp:// protocol", 400
|
||||
# XSS etc protection
|
||||
if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
|
||||
return "Invalid URL", 400
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
from changedetectionio import processors
|
||||
processor_config_data = {}
|
||||
regular_data = {}
|
||||
|
||||
# Make a mutable copy of request.json for modification
|
||||
json_data = dict(request.json)
|
||||
|
||||
# Extract and remove processor config fields from json_data
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(json_data)
|
||||
|
||||
# Filter out readOnly fields (extracted from OpenAPI spec Watch schema)
|
||||
# These are system-managed fields that should never be user-settable
|
||||
readonly_fields = get_readonly_watch_fields()
|
||||
|
||||
# Also filter out @property attributes (computed/derived values from the model)
|
||||
# These are not stored and should be ignored in PUT requests
|
||||
from changedetectionio.model.Watch import model as WatchModel
|
||||
property_fields = WatchModel.get_property_names()
|
||||
|
||||
# Combine both sets of fields to ignore
|
||||
fields_to_ignore = readonly_fields | property_fields
|
||||
|
||||
# Remove all ignored fields from update data
|
||||
for field in fields_to_ignore:
|
||||
json_data.pop(field, None)
|
||||
|
||||
# Validate remaining fields - reject truly unknown fields
|
||||
# Get valid fields from WatchBase schema
|
||||
from . import get_watch_schema_properties
|
||||
valid_fields = set(get_watch_schema_properties().keys())
|
||||
|
||||
# Also allow last_viewed (explicitly defined in UpdateWatch schema)
|
||||
valid_fields.add('last_viewed')
|
||||
|
||||
# Check for unknown fields
|
||||
unknown_fields = set(json_data.keys()) - valid_fields
|
||||
if unknown_fields:
|
||||
return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
|
||||
for key, value in request.json.items():
|
||||
if key.startswith('processor_config_'):
|
||||
config_key = key.replace('processor_config_', '')
|
||||
if value: # Only save non-empty values
|
||||
processor_config_data[config_key] = value
|
||||
else:
|
||||
regular_data[key] = value
|
||||
|
||||
# Update watch with regular (non-processor-config) fields
|
||||
watch.update(json_data)
|
||||
watch.commit()
|
||||
watch.update(regular_data)
|
||||
|
||||
# Save processor config to JSON file
|
||||
processors.save_processor_config(self.datastore, uuid, processor_config_data)
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = request.json.get('processor', watch.get('processor'))
|
||||
if processor_name:
|
||||
# Create a processor instance to access config methods
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
processor_instance = difference_detection_processor(self.datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"API: Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"API: Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"API: Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API: Failed to save processor config: {e}")
|
||||
|
||||
return "OK", 200
|
||||
|
||||
@@ -220,7 +192,7 @@ class WatchHistory(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
# Get a list of available history for a watch by UUID
|
||||
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>/history
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchHistory')
|
||||
def get(self, uuid):
|
||||
@@ -250,10 +222,6 @@ class WatchSingleHistory(Resource):
|
||||
if timestamp == 'latest':
|
||||
timestamp = list(watch.history.keys())[-1]
|
||||
|
||||
# Validate that the timestamp exists in history
|
||||
if timestamp not in watch.history:
|
||||
abort(404, message=f"No history snapshot found for timestamp '{timestamp}'")
|
||||
|
||||
if request.args.get('html'):
|
||||
content = watch.get_fetched_html(timestamp)
|
||||
if content:
|
||||
@@ -334,28 +302,18 @@ class WatchHistoryDiff(Resource):
|
||||
from_version_file_contents = watch.get_history_snapshot(from_timestamp)
|
||||
to_version_file_contents = watch.get_history_snapshot(to_timestamp)
|
||||
|
||||
# Get diff preferences from query parameters (matching UI preferences in DIFF_PREFERENCES_CONFIG)
|
||||
# Support both 'type' (UI parameter) and 'word_diff' (API parameter) for backward compatibility
|
||||
diff_type = request.args.get('type', 'diffLines')
|
||||
if diff_type == 'diffWords':
|
||||
word_diff = True
|
||||
# Get diff preferences (using defaults similar to the existing code)
|
||||
diff_prefs = {
|
||||
'diff_ignoreWhitespace': False,
|
||||
'diff_changesOnly': True
|
||||
}
|
||||
|
||||
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'false'))
|
||||
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
||||
include_removed = strtobool(request.args.get('removed', 'true'))
|
||||
include_added = strtobool(request.args.get('added', 'true'))
|
||||
include_replaced = strtobool(request.args.get('replaced', 'true'))
|
||||
|
||||
# Generate the diff with all preferences
|
||||
# Generate the diff
|
||||
content = diff.render_diff(
|
||||
previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
ignore_junk=ignore_whitespace,
|
||||
include_equal=not changes_only,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
ignore_junk=diff_prefs.get('diff_ignoreWhitespace'),
|
||||
include_equal=not diff_prefs.get('diff_changesOnly'),
|
||||
word_diff=word_diff,
|
||||
)
|
||||
|
||||
@@ -402,11 +360,18 @@ class WatchFavicon(Resource):
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
|
||||
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
@@ -422,6 +387,7 @@ class CreateWatch(Resource):
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('createWatch')
|
||||
@expects_json(schema_create_watch)
|
||||
def post(self):
|
||||
"""Create a single watch."""
|
||||
|
||||
@@ -433,33 +399,16 @@ class CreateWatch(Resource):
|
||||
|
||||
if json_data.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not plist or json_data.get('proxy') not in plist:
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
if not json_data.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(json_data)
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in json_data:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = json_data.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not watch)
|
||||
from changedetectionio import processors
|
||||
|
||||
extras = copy.deepcopy(json_data)
|
||||
|
||||
# Extract and remove processor config fields from extras
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(extras)
|
||||
|
||||
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
|
||||
tags = None
|
||||
if extras.get('tag'):
|
||||
@@ -469,25 +418,10 @@ class CreateWatch(Resource):
|
||||
del extras['url']
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||
|
||||
# Save processor config to separate JSON file
|
||||
if new_uuid and processor_config_data:
|
||||
processors.save_processor_config(self.datastore, new_uuid, processor_config_data)
|
||||
if new_uuid:
|
||||
# Dont queue because the scheduler will check that it hasnt been checked before anyway
|
||||
# worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
# Check if it was a limit issue
|
||||
page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
|
||||
if page_watch_limit:
|
||||
try:
|
||||
page_watch_limit = int(page_watch_limit)
|
||||
current_watch_count = len(self.datastore.data['watching'])
|
||||
if current_watch_count >= page_watch_limit:
|
||||
return f"Watch limit reached ({current_watch_count}/{page_watch_limit} watches). Cannot add more watches.", 429
|
||||
except ValueError:
|
||||
pass
|
||||
return "Invalid or unsupported URL", 400
|
||||
|
||||
@auth.check_token
|
||||
@@ -509,65 +443,14 @@ class CreateWatch(Resource):
|
||||
'last_error': watch['last_error'],
|
||||
'link': watch.link,
|
||||
'page_title': watch['page_title'],
|
||||
'tags': [*tags], # Unpack dict keys to list (can't use list() since variable named 'list')
|
||||
'title': watch['title'],
|
||||
'url': watch['url'],
|
||||
'viewed': watch.viewed
|
||||
}
|
||||
|
||||
if request.args.get('recheck_all'):
|
||||
# Collect all watches to queue
|
||||
watches_to_queue = self.datastore.data['watching'].keys()
|
||||
for uuid in self.datastore.data['watching'].keys():
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return {'status': "OK"}, 200
|
||||
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = [
|
||||
uuid for uuid in watches_to_queue
|
||||
if uuid not in queued_uuids and uuid not in running_uuids
|
||||
]
|
||||
|
||||
# Queue only the filtered watches
|
||||
for uuid in watches_to_queue_filtered:
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
if skipped_count > 0:
|
||||
return {'status': f'OK, queued {len(watches_to_queue_filtered)} watches for rechecking ({skipped_count} already queued or running)'}, 200
|
||||
else:
|
||||
return {'status': f'OK, queued {len(watches_to_queue_filtered)} watches for rechecking'}, 200
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_all_watches_background():
|
||||
"""Background thread to queue all watches - discarded after completion."""
|
||||
try:
|
||||
queued_count = 0
|
||||
skipped_count = 0
|
||||
for uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if uuid not in queued_uuids and uuid not in running_uuids:
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
logger.info(f"Background queueing complete: {queued_count} watches queued, {skipped_count} skipped (already queued/running)")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing all watches: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=queue_all_watches_background, daemon=True, name="QueueAllWatches-Background")
|
||||
thread.start()
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
return list, 200
|
||||
return list, 200
|
||||
@@ -1,137 +1,50 @@
|
||||
import copy
|
||||
import functools
|
||||
from flask import request, abort
|
||||
from loguru import logger
|
||||
from . import api_schema
|
||||
from ..model import watch_base
|
||||
|
||||
@functools.cache
|
||||
def build_merged_spec_dict():
|
||||
"""
|
||||
Load the base OpenAPI spec and merge in any per-processor api.yaml extensions.
|
||||
# Build a JSON Schema atleast partially based on our Watch model
|
||||
watch_base_config = watch_base()
|
||||
schema = api_schema.build_watch_json_schema(watch_base_config)
|
||||
|
||||
Each processor can provide an api.yaml file alongside its __init__.py that defines
|
||||
additional schemas (e.g., processor_config_restock_diff). These are merged into
|
||||
WatchBase.properties so the spec accurately reflects what the API accepts.
|
||||
schema_create_watch = copy.deepcopy(schema)
|
||||
schema_create_watch['required'] = ['url']
|
||||
del schema_create_watch['properties']['last_viewed']
|
||||
|
||||
Plugin processors (via pluggy) are also supported - they just need an api.yaml
|
||||
next to their processor module.
|
||||
schema_update_watch = copy.deepcopy(schema)
|
||||
schema_update_watch['additionalProperties'] = False
|
||||
|
||||
Returns the merged dict (cached - do not mutate the returned value).
|
||||
"""
|
||||
import os
|
||||
import yaml
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
|
||||
try:
|
||||
from changedetectionio.processors import find_processors, get_parent_module
|
||||
for module, proc_name in find_processors():
|
||||
parent = get_parent_module(module)
|
||||
if not parent or not hasattr(parent, '__file__'):
|
||||
continue
|
||||
api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml')
|
||||
if not os.path.exists(api_yaml_path):
|
||||
continue
|
||||
with open(api_yaml_path, 'r', encoding='utf-8') as f:
|
||||
proc_spec = yaml.safe_load(f)
|
||||
# Merge schemas
|
||||
proc_schemas = proc_spec.get('components', {}).get('schemas', {})
|
||||
spec_dict['components']['schemas'].update(proc_schemas)
|
||||
# Inject processor_config_{name} into WatchBase if the schema is defined
|
||||
schema_key = f'processor_config_{proc_name}'
|
||||
if schema_key in proc_schemas:
|
||||
spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = {
|
||||
'$ref': f'#/components/schemas/{schema_key}'
|
||||
}
|
||||
# Append x-code-samples from processor paths into existing path operations
|
||||
for path, path_item in proc_spec.get('paths', {}).items():
|
||||
if path not in spec_dict.get('paths', {}):
|
||||
continue
|
||||
for method, operation in path_item.items():
|
||||
if method not in spec_dict['paths'][path]:
|
||||
continue
|
||||
if 'x-code-samples' in operation:
|
||||
existing = spec_dict['paths'][path][method].get('x-code-samples', [])
|
||||
spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples']
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to merge processor API specs: {e}")
|
||||
|
||||
return spec_dict
|
||||
# Tag schema is also based on watch_base since Tag inherits from it
|
||||
schema_tag = copy.deepcopy(schema)
|
||||
schema_create_tag = copy.deepcopy(schema_tag)
|
||||
schema_create_tag['required'] = ['title']
|
||||
schema_update_tag = copy.deepcopy(schema_tag)
|
||||
schema_update_tag['additionalProperties'] = False
|
||||
|
||||
schema_notification_urls = copy.deepcopy(schema)
|
||||
schema_create_notification_urls = copy.deepcopy(schema_notification_urls)
|
||||
schema_create_notification_urls['required'] = ['notification_urls']
|
||||
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
|
||||
schema_delete_notification_urls['required'] = ['notification_urls']
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
import os
|
||||
import yaml # Lazy import - only loaded when API validation is actually used
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
return OpenAPI.from_dict(build_merged_spec_dict())
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
# Possibly for pip3 packages
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
Used by Import endpoint to validate and convert query parameters.
|
||||
Returns the merged YAML dict (not the OpenAPI object).
|
||||
"""
|
||||
return build_merged_spec_dict()
|
||||
|
||||
@functools.cache
|
||||
def _resolve_schema_properties(schema_name):
|
||||
"""
|
||||
Generic helper to resolve schema properties, including allOf inheritance.
|
||||
|
||||
Args:
|
||||
schema_name: Name of the schema (e.g., 'WatchBase', 'Watch', 'Tag')
|
||||
|
||||
Returns:
|
||||
dict: All properties including inherited ones from $ref schemas
|
||||
"""
|
||||
spec_dict = get_openapi_schema_dict()
|
||||
schema = spec_dict['components']['schemas'].get(schema_name, {})
|
||||
|
||||
properties = {}
|
||||
|
||||
# Handle allOf (schema inheritance)
|
||||
if 'allOf' in schema:
|
||||
for item in schema['allOf']:
|
||||
# Resolve $ref to parent schema
|
||||
if '$ref' in item:
|
||||
ref_path = item['$ref'].split('/')[-1]
|
||||
ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
|
||||
properties.update(ref_schema.get('properties', {}))
|
||||
# Add schema-specific properties
|
||||
if 'properties' in item:
|
||||
properties.update(item['properties'])
|
||||
else:
|
||||
# Direct properties (no inheritance)
|
||||
properties = schema.get('properties', {})
|
||||
|
||||
return properties
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_watch_schema_properties():
|
||||
"""
|
||||
Extract watch schema properties from OpenAPI spec for Import endpoint.
|
||||
|
||||
Returns WatchBase properties (all writable Watch fields).
|
||||
"""
|
||||
return _resolve_schema_properties('WatchBase')
|
||||
|
||||
# Import readonly field utilities from shared module (avoids circular dependencies with model layer)
|
||||
from changedetectionio.model.schema_utils import get_readonly_watch_fields, get_readonly_tag_fields
|
||||
|
||||
@functools.cache
|
||||
def get_tag_schema_properties():
|
||||
"""
|
||||
Extract Tag schema properties from OpenAPI spec.
|
||||
|
||||
Returns WatchBase properties + Tag-specific properties (overrides_watch).
|
||||
"""
|
||||
return _resolve_schema_properties('Tag')
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
||||
return _openapi_spec
|
||||
|
||||
def validate_openapi_request(operation_id):
|
||||
"""Decorator to validate incoming requests against OpenAPI spec."""
|
||||
@@ -144,7 +57,6 @@ def validate_openapi_request(operation_id):
|
||||
if request.method.upper() != 'GET':
|
||||
# Lazy import - only loaded when actually validating a request
|
||||
from openapi_core.contrib.flask import FlaskOpenAPIRequest
|
||||
from openapi_core.templating.paths.exceptions import ServerNotFound, PathNotFound, PathError
|
||||
|
||||
spec = get_openapi_spec()
|
||||
openapi_request = FlaskOpenAPIRequest(request)
|
||||
@@ -152,29 +64,8 @@ def validate_openapi_request(operation_id):
|
||||
if result.errors:
|
||||
error_details = []
|
||||
for error in result.errors:
|
||||
# Skip path/server validation errors for reverse proxy compatibility
|
||||
# Flask routing already validates that endpoints exist (returns 404 if not).
|
||||
# OpenAPI validation here is primarily for request body schema validation.
|
||||
# When behind nginx/reverse proxy, URLs may have path prefixes that don't
|
||||
# match the OpenAPI server definitions, causing false positives.
|
||||
if isinstance(error, PathError):
|
||||
logger.debug(f"API Call - Skipping path/server validation (delegated to Flask): {error}")
|
||||
continue
|
||||
|
||||
error_str = str(error)
|
||||
# Extract detailed schema errors from __cause__
|
||||
if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
|
||||
for schema_error in error.__cause__.schema_errors:
|
||||
field = '.'.join(str(p) for p in schema_error.path) if schema_error.path else 'body'
|
||||
msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
|
||||
error_details.append(f"{field}: {msg}")
|
||||
else:
|
||||
error_details.append(error_str)
|
||||
|
||||
# Only raise if we have actual validation errors (not path/server issues)
|
||||
if error_details:
|
||||
logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
|
||||
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
|
||||
error_details.append(str(error))
|
||||
raise BadRequest(f"OpenAPI validation failed: {error_details}")
|
||||
except BadRequest:
|
||||
# Re-raise BadRequest exceptions (validation failures)
|
||||
raise
|
||||
@@ -191,6 +82,5 @@ from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, Cr
|
||||
from .Tags import Tags, Tag
|
||||
from .Import import Import
|
||||
from .SystemInfo import SystemInfo
|
||||
from .Spec import Spec
|
||||
from .Notifications import Notifications
|
||||
|
||||
|
||||
162
changedetectionio/api/api_schema.py
Normal file
162
changedetectionio/api/api_schema.py
Normal file
@@ -0,0 +1,162 @@
|
||||
# Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
|
||||
# Probably other ways to solve this when the backend switches to some ORM
|
||||
from changedetectionio.notification import valid_notification_formats
|
||||
|
||||
|
||||
def build_time_between_check_json_schema():
|
||||
# Setup time between check schema
|
||||
schema_properties_time_between_check = {
|
||||
"type": "object",
|
||||
"additionalProperties": False,
|
||||
"properties": {}
|
||||
}
|
||||
for p in ['weeks', 'days', 'hours', 'minutes', 'seconds']:
|
||||
schema_properties_time_between_check['properties'][p] = {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
return schema_properties_time_between_check
|
||||
|
||||
def build_watch_json_schema(d):
|
||||
# Base JSON schema
|
||||
schema = {
|
||||
'type': 'object',
|
||||
'properties': {},
|
||||
}
|
||||
|
||||
for k, v in d.items():
|
||||
# @todo 'integer' is not covered here because its almost always for internal usage
|
||||
|
||||
if isinstance(v, type(None)):
|
||||
schema['properties'][k] = {
|
||||
"anyOf": [
|
||||
{"type": "null"},
|
||||
]
|
||||
}
|
||||
elif isinstance(v, list):
|
||||
schema['properties'][k] = {
|
||||
"anyOf": [
|
||||
{"type": "array",
|
||||
# Always is an array of strings, like text or regex or something
|
||||
"items": {
|
||||
"type": "string",
|
||||
"maxLength": 5000
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
elif isinstance(v, bool):
|
||||
schema['properties'][k] = {
|
||||
"anyOf": [
|
||||
{"type": "boolean"},
|
||||
]
|
||||
}
|
||||
elif isinstance(v, str):
|
||||
schema['properties'][k] = {
|
||||
"anyOf": [
|
||||
{"type": "string",
|
||||
"maxLength": 5000},
|
||||
]
|
||||
}
|
||||
|
||||
# Can also be a string (or None by default above)
|
||||
for v in ['body',
|
||||
'notification_body',
|
||||
'notification_format',
|
||||
'notification_title',
|
||||
'proxy',
|
||||
'tag',
|
||||
'title',
|
||||
'webdriver_js_execute_code'
|
||||
]:
|
||||
schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
|
||||
|
||||
for v in ['last_viewed']:
|
||||
schema['properties'][v] = {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp in seconds of the last time the watch was viewed.",
|
||||
"minimum": 0
|
||||
}
|
||||
|
||||
# None or Boolean
|
||||
schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
|
||||
|
||||
schema['properties']['method'] = {"type": "string",
|
||||
"enum": ["GET", "POST", "DELETE", "PUT"]
|
||||
}
|
||||
|
||||
schema['properties']['fetch_backend']['anyOf'].append({"type": "string",
|
||||
"enum": ["html_requests", "html_webdriver"]
|
||||
})
|
||||
|
||||
schema['properties']['processor'] = {"anyOf": [
|
||||
{"type": "string", "enum": ["restock_diff", "text_json_diff"]},
|
||||
{"type": "null"}
|
||||
]}
|
||||
|
||||
# All headers must be key/value type dict
|
||||
schema['properties']['headers'] = {
|
||||
"type": "object",
|
||||
"patternProperties": {
|
||||
# Should always be a string:string type value
|
||||
".*": {"type": "string"},
|
||||
}
|
||||
}
|
||||
|
||||
schema['properties']['notification_format'] = {'type': 'string',
|
||||
'enum': list(valid_notification_formats.keys())
|
||||
}
|
||||
|
||||
# Stuff that shouldn't be available but is just state-storage
|
||||
for v in ['previous_md5', 'last_error', 'has_ldjson_price_data', 'previous_md5_before_filters', 'uuid']:
|
||||
del schema['properties'][v]
|
||||
|
||||
schema['properties']['webdriver_delay']['anyOf'].append({'type': 'integer'})
|
||||
|
||||
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
||||
|
||||
schema['properties']['time_between_check_use_default'] = {
|
||||
"type": "boolean",
|
||||
"default": True,
|
||||
"description": "Whether to use global settings for time between checks - defaults to true if not set"
|
||||
}
|
||||
|
||||
schema['properties']['browser_steps'] = {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"operation": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000 # Allows null and any string up to 5000 chars (including "")
|
||||
},
|
||||
"selector": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000
|
||||
},
|
||||
"optional_value": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000
|
||||
}
|
||||
},
|
||||
"required": ["operation", "selector", "optional_value"],
|
||||
"additionalProperties": False # No extra keys allowed
|
||||
}
|
||||
},
|
||||
{"type": "null"}, # Allows null for `browser_steps`
|
||||
{"type": "array", "maxItems": 0} # Allows empty array []
|
||||
]
|
||||
}
|
||||
|
||||
# headers ?
|
||||
return schema
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
from blinker import signal
|
||||
|
||||
from .processors.exceptions import ProcessorException
|
||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.queuedWatchMetaData import PrioritizedItem
|
||||
from changedetectionio.pluggy_interface import apply_update_handler_alter, apply_update_finalize
|
||||
from changedetectionio.flask_app import watch_check_update
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
import queue
|
||||
import time
|
||||
|
||||
from loguru import logger
|
||||
@@ -17,91 +17,36 @@ from loguru import logger
|
||||
# Async version of update_worker
|
||||
# Processes jobs from AsyncSignalPriorityQueue instead of threaded queue
|
||||
|
||||
IN_PYTEST = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
DEFER_SLEEP_TIME_ALREADY_QUEUED = 0.3 if IN_PYTEST else 10.0
|
||||
|
||||
async def async_update_worker(worker_id, q, notification_q, app, datastore, executor=None):
|
||||
async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
"""
|
||||
Async worker function that processes watch check jobs from the queue.
|
||||
|
||||
|
||||
Args:
|
||||
worker_id: Unique identifier for this worker
|
||||
q: AsyncSignalPriorityQueue containing jobs to process
|
||||
notification_q: Standard queue for notifications
|
||||
app: Flask application instance
|
||||
datastore: Application datastore
|
||||
executor: ThreadPoolExecutor for queue operations (optional)
|
||||
|
||||
Returns:
|
||||
"restart" if worker should restart, "shutdown" for clean exit
|
||||
"""
|
||||
# Set a descriptive name for this task
|
||||
task = asyncio.current_task()
|
||||
if task:
|
||||
task.set_name(f"async-worker-{worker_id}")
|
||||
|
||||
# Read restart policy from environment
|
||||
max_jobs = int(os.getenv("WORKER_MAX_JOBS", "10"))
|
||||
max_runtime_seconds = int(os.getenv("WORKER_MAX_RUNTIME", "3600")) # 1 hour default
|
||||
|
||||
jobs_processed = 0
|
||||
start_time = time.time()
|
||||
|
||||
# Log thread name for debugging
|
||||
import threading
|
||||
thread_name = threading.current_thread().name
|
||||
logger.info(f"Starting async worker {worker_id} on thread '{thread_name}' (max_jobs={max_jobs}, max_runtime={max_runtime_seconds}s)")
|
||||
|
||||
|
||||
logger.info(f"Starting async worker {worker_id}")
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
update_handler = None
|
||||
watch = None
|
||||
processing_exception = None # Reset at start of each iteration to prevent state bleeding
|
||||
|
||||
try:
|
||||
# Efficient blocking via run_in_executor (no polling overhead!)
|
||||
# Worker blocks in threading.Queue.get() which uses Condition.wait()
|
||||
# Executor must be sized to match worker count (see worker_pool.py: 50 threads default)
|
||||
# Single timeout (no double-timeout wrapper) = no race condition
|
||||
queued_item_data = await q.async_get(executor=executor, timeout=1.0)
|
||||
|
||||
# CRITICAL: Claim UUID immediately after getting from queue to prevent race condition
|
||||
# in wait_for_all_checks() which checks qsize() and running_uuids separately
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
if not worker_pool.claim_uuid_for_processing(uuid, worker_id):
|
||||
# Already being processed - re-queue and continue
|
||||
logger.trace(f"Worker {worker_id} detected UUID {uuid} already processing during claim - deferring")
|
||||
await asyncio.sleep(DEFER_SLEEP_TIME_ALREADY_QUEUED)
|
||||
deferred_priority = max(1000, queued_item_data.priority * 10)
|
||||
deferred_item = PrioritizedItem(priority=deferred_priority, item=queued_item_data.item)
|
||||
worker_pool.queue_item_async_safe(q, deferred_item, silent=True)
|
||||
continue
|
||||
# Use native janus async interface - no threads needed!
|
||||
queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# No jobs available - check if we should restart based on time while idle
|
||||
runtime = time.time() - start_time
|
||||
if runtime >= max_runtime_seconds:
|
||||
logger.info(f"Worker {worker_id} idle and reached max runtime ({runtime:.0f}s), restarting")
|
||||
return "restart"
|
||||
continue
|
||||
except RuntimeError as e:
|
||||
# Handle executor shutdown gracefully - this is expected during shutdown
|
||||
if "cannot schedule new futures after shutdown" in str(e):
|
||||
# Executor shut down - exit gracefully without logging in pytest
|
||||
if not IN_PYTEST:
|
||||
logger.debug(f"Worker {worker_id} detected executor shutdown, exiting")
|
||||
break
|
||||
# Other RuntimeError - log and continue
|
||||
logger.error(f"Worker {worker_id} runtime error: {e}")
|
||||
await asyncio.sleep(0.1)
|
||||
# No jobs available, continue loop
|
||||
continue
|
||||
except Exception as e:
|
||||
# Handle expected Empty exception from queue timeout
|
||||
import queue
|
||||
if isinstance(e, queue.Empty):
|
||||
# Queue is empty, normal behavior - just continue
|
||||
continue
|
||||
|
||||
# Unexpected exception - log as critical
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
|
||||
|
||||
# Log queue health for debugging
|
||||
@@ -115,11 +60,26 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
|
||||
# UUID already claimed above immediately after getting from queue
|
||||
# to prevent race condition with wait_for_all_checks()
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
|
||||
# RACE CONDITION FIX: Check if this UUID is already being processed by another worker
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.queuedWatchMetaData import PrioritizedItem
|
||||
if worker_handler.is_watch_running(uuid):
|
||||
logger.trace(f"Worker {worker_id} skipping UUID {uuid} - already being processed, re-queuing for later")
|
||||
# Re-queue with MUCH lower priority (higher number = processed later)
|
||||
# This prevents tight loop where high-priority item keeps getting picked immediately
|
||||
deferred_priority = max(1000, queued_item_data.priority * 10)
|
||||
deferred_item = PrioritizedItem(priority=deferred_priority, item=queued_item_data.item)
|
||||
worker_handler.queue_item_async_safe(q, deferred_item, silent=True)
|
||||
await asyncio.sleep(0.1) # Brief pause to avoid tight loop
|
||||
continue
|
||||
|
||||
fetch_start_time = round(time.time())
|
||||
|
||||
# Mark this UUID as being processed
|
||||
worker_handler.set_uuid_processing(uuid, processing=True)
|
||||
|
||||
try:
|
||||
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
|
||||
changed_detected = False
|
||||
@@ -136,43 +96,29 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
|
||||
|
||||
try:
|
||||
# Retrieve signal by name to ensure thread-safe access across worker threads
|
||||
watch_check_update = signal('watch_check_update')
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
# Processor is what we are using for detecting the "Change"
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Init a new 'difference_detection_processor'
|
||||
# Use get_processor_module() to support both built-in and plugin processors
|
||||
from changedetectionio.processors import get_processor_module
|
||||
processor_module = get_processor_module(processor)
|
||||
|
||||
if not processor_module:
|
||||
error_msg = f"Processor module '{processor}' not found."
|
||||
logger.error(error_msg)
|
||||
raise ModuleNotFoundError(error_msg)
|
||||
try:
|
||||
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
|
||||
except ModuleNotFoundError as e:
|
||||
print(f"Processor module '{processor}' not found.")
|
||||
raise e
|
||||
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid)
|
||||
|
||||
# Allow plugins to modify/wrap the update_handler
|
||||
update_handler = apply_update_handler_alter(update_handler, watch, datastore)
|
||||
|
||||
update_signal = signal('watch_small_status_comment')
|
||||
update_signal.send(watch_uuid=uuid, status="Fetching page..")
|
||||
|
||||
# All fetchers are now async, so call directly
|
||||
await update_handler.call_browser()
|
||||
|
||||
# Run change detection in executor to avoid blocking event loop
|
||||
# This includes CPU-intensive operations like HTML parsing (lxml/inscriptis)
|
||||
# which can take 2-10ms and cause GIL contention across workers
|
||||
loop = asyncio.get_event_loop()
|
||||
changed_detected, update_obj, contents = await loop.run_in_executor(
|
||||
executor,
|
||||
lambda: update_handler.run_changedetection(watch=watch)
|
||||
)
|
||||
# Run change detection (this is synchronous)
|
||||
changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
|
||||
|
||||
except PermissionError as e:
|
||||
logger.critical(f"File permission error updating file, watch: {uuid}")
|
||||
@@ -182,10 +128,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
except ProcessorException as e:
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
e.screenshot = None # Free memory immediately
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -205,11 +149,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -228,10 +170,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data, as_error=True)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
if e.page_text:
|
||||
watch.save_error_text(contents=e.page_text)
|
||||
|
||||
@@ -241,7 +181,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
except FilterNotFoundInResponse as e:
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
logger.debug(f"Received FilterNotFoundInResponse exception for {uuid}")
|
||||
|
||||
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
@@ -249,11 +188,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
@@ -261,19 +198,17 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
||||
logger.debug(f"FilterNotFoundInResponse - Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
|
||||
logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
|
||||
if c >= threshold:
|
||||
if not watch.get('notification_muted'):
|
||||
logger.debug(f"FilterNotFoundInResponse - Sending filter failed notification for {uuid}")
|
||||
logger.debug(f"Sending filter failed notification for {uuid}")
|
||||
await send_filter_failure_notification(uuid, notification_q, datastore)
|
||||
c = 0
|
||||
logger.debug(f"FilterNotFoundInResponse - Reset filter failure count back to zero")
|
||||
else:
|
||||
logger.debug(f"FilterNotFoundInResponse - {c} of threshold {threshold}..")
|
||||
logger.debug(f"Reset filter failure count back to zero")
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
||||
else:
|
||||
logger.trace(f"FilterNotFoundInResponse - {uuid} - filter_failure_notification_send not enabled, skipping")
|
||||
logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -281,12 +216,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Yes fine, so nothing todo, don't continue to process.
|
||||
process_changedetection_results = False
|
||||
changed_detected = False
|
||||
logger.debug(f'[{uuid}] - checksumFromPreviousCheckWasTheSame - Checksum from previous check was the same, nothing todo here.')
|
||||
# Reset the edited flag since we successfully completed the check
|
||||
watch.reset_watch_edited_flag()
|
||||
# Page was fetched successfully - clear any previous error state
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': False})
|
||||
cleanup_error_artifacts(uuid, datastore)
|
||||
|
||||
except content_fetchers_exceptions.BrowserConnectError as e:
|
||||
datastore.update_watch(uuid=uuid,
|
||||
@@ -353,7 +282,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
@@ -365,7 +293,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code,
|
||||
@@ -378,15 +305,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
process_changedetection_results = False
|
||||
logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")
|
||||
|
||||
except KeyError as e:
|
||||
# Watch was deleted between being queued and processed — skip
|
||||
logger.warning(f"Worker {worker_id} skipping UUID {uuid}: {e}")
|
||||
process_changedetection_results = False
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"Worker {worker_id} exception processing watch UUID: {uuid}")
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
logger.error(str(e))
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -394,7 +315,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
update_obj['content-type'] = str(update_handler.fetcher.get_all_headers().get('content-type', '') or "").lower()
|
||||
update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
|
||||
if not watch.get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
@@ -405,27 +326,17 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
logger.debug(f"Processing watch UUID: {uuid} - xpath_data length returned {len(update_handler.xpath_data) if update_handler and update_handler.xpath_data else 'empty.'}")
|
||||
if update_handler and process_changedetection_results:
|
||||
logger.debug(f"Processing watch UUID: {uuid} - xpath_data length returned {len(update_handler.xpath_data) if update_handler.xpath_data else 'empty.'}")
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
# Reset the edited flag BEFORE update_watch (which calls watch.update() and would set it again)
|
||||
watch.reset_watch_edited_flag()
|
||||
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
if changed_detected or not watch.history_n:
|
||||
if update_handler.screenshot:
|
||||
watch.save_screenshot(screenshot=update_handler.screenshot)
|
||||
# Free screenshot memory immediately after saving
|
||||
update_handler.screenshot = None
|
||||
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'screenshot'):
|
||||
update_handler.fetcher.screenshot = None
|
||||
|
||||
if update_handler.xpath_data:
|
||||
watch.save_xpath_data(data=update_handler.xpath_data)
|
||||
# Free xpath data memory
|
||||
update_handler.xpath_data = None
|
||||
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'xpath_data'):
|
||||
update_handler.fetcher.xpath_data = None
|
||||
|
||||
# Ensure unique timestamp for history
|
||||
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
|
||||
@@ -452,145 +363,102 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
await send_content_changed_notification(uuid, notification_q, datastore)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
logger.critical(f"Worker {worker_id} exception in process_changedetection_results")
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
logger.critical(str(e))
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
|
||||
# Always record attempt count
|
||||
count = watch.get('check_count', 0) + 1
|
||||
|
||||
final_updates = {'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||
'check_count': count,
|
||||
}
|
||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
||||
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
if page_title:
|
||||
page_title = page_title.strip()[:2000]
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
|
||||
# Record server header
|
||||
try:
|
||||
server_header = str(update_handler.fetcher.get_all_headers().get('server', '') or "").strip().lower()[:255]
|
||||
if server_header:
|
||||
final_updates['remote_server_reply'] = server_header
|
||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
||||
datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header})
|
||||
except Exception as e:
|
||||
server_header = None
|
||||
pass
|
||||
|
||||
if update_handler: # Could be none or empty if the processor was not found
|
||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
||||
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
if page_title:
|
||||
page_title = page_title.strip()[:2000]
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
final_updates['page_title'] = page_title
|
||||
except Exception as e:
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
# Store favicon if necessary
|
||||
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
|
||||
watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
|
||||
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
|
||||
)
|
||||
|
||||
# Store favicon if necessary
|
||||
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
|
||||
watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
|
||||
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
|
||||
)
|
||||
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||
'check_count': count})
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj=final_updates)
|
||||
|
||||
# NOW clear fetcher content - after all processing is complete
|
||||
# This is the last point where we need the fetcher data
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
|
||||
# Explicitly delete update_handler to free all references
|
||||
if update_handler:
|
||||
del update_handler
|
||||
update_handler = None
|
||||
|
||||
# Force garbage collection
|
||||
import gc
|
||||
gc.collect()
|
||||
# NOW clear fetcher content - after all processing is complete
|
||||
# This is the last point where we need the fetcher data
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
||||
|
||||
except Exception as e:
|
||||
# Store the processing exception for plugin finalization hook
|
||||
processing_exception = e
|
||||
|
||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
|
||||
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
||||
|
||||
# Also update the watch with error information
|
||||
if datastore and uuid in datastore.data['watching']:
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Worker error: {str(e)}"})
|
||||
|
||||
finally:
|
||||
|
||||
try:
|
||||
await update_handler.fetcher.quit(watch=watch)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception while cleaning/quit after calling browser: {e}")
|
||||
|
||||
# Always cleanup - this runs whether there was an exception or not
|
||||
if uuid:
|
||||
# Capture references for plugin finalize hook BEFORE cleanup
|
||||
# (cleanup may delete these variables, but plugins need the original references)
|
||||
finalize_handler = update_handler # Capture now, before cleanup deletes it
|
||||
finalize_watch = watch # Capture now, before any modifications
|
||||
|
||||
# Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
|
||||
try:
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
await update_handler.fetcher.quit(watch=watch)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception while cleaning/quit after calling browser: {e}")
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
# Mark UUID as no longer being processed
|
||||
worker_handler.set_uuid_processing(uuid, processing=False)
|
||||
|
||||
# Send completion signal
|
||||
if watch:
|
||||
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
|
||||
watch_check_update.send(watch_uuid=watch['uuid'])
|
||||
|
||||
try:
|
||||
|
||||
# Clean up all memory references BEFORE garbage collection
|
||||
# Explicitly clean up update_handler and all its references
|
||||
if update_handler:
|
||||
# Clear fetcher content using the proper method
|
||||
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
|
||||
# Clear processor references
|
||||
if hasattr(update_handler, 'content_processor'):
|
||||
update_handler.content_processor = None
|
||||
del update_handler
|
||||
|
||||
update_handler = None
|
||||
|
||||
# Clear large content variables
|
||||
# Clear local contents variable if it still exists
|
||||
if 'contents' in locals():
|
||||
del contents
|
||||
|
||||
# Force garbage collection after all references are cleared
|
||||
# Note: We don't set watch = None here because:
|
||||
# 1. watch is just a local reference to datastore.data['watching'][uuid]
|
||||
# 2. Setting it to None doesn't affect the datastore
|
||||
# 3. GC can't collect the object anyway (still referenced by datastore)
|
||||
# 4. It would just cause confusion
|
||||
|
||||
# Force garbage collection after cleanup
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
|
||||
# Call plugin finalization hook after all cleanup is done
|
||||
# Use captured references from before cleanup
|
||||
try:
|
||||
apply_update_finalize(
|
||||
update_handler=finalize_handler,
|
||||
watch=finalize_watch,
|
||||
datastore=datastore,
|
||||
processing_exception=processing_exception
|
||||
)
|
||||
except Exception as finalize_error:
|
||||
logger.error(f"Worker {worker_id} error in finalize hook: {finalize_error}")
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
finally:
|
||||
# Clean up captured references to allow immediate garbage collection
|
||||
del finalize_handler
|
||||
del finalize_watch
|
||||
|
||||
# Release UUID from processing AFTER all cleanup and hooks complete (thread-safe)
|
||||
# This ensures wait_for_all_checks() waits for finalize hooks to complete
|
||||
try:
|
||||
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
|
||||
except Exception as release_error:
|
||||
logger.error(f"Worker {worker_id} error releasing UUID: {release_error}")
|
||||
logger.exception(f"Worker {worker_id} full exception details:")
|
||||
finally:
|
||||
# Send completion signal - retrieve by name to ensure thread-safe access
|
||||
if watch:
|
||||
watch_check_update = signal('watch_check_update')
|
||||
watch_check_update.send(watch_uuid=watch['uuid'])
|
||||
|
||||
del (uuid)
|
||||
|
||||
|
||||
# Brief pause before continuing to avoid tight error loops (only on error)
|
||||
if 'e' in locals():
|
||||
await asyncio.sleep(1.0)
|
||||
@@ -598,19 +466,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Small yield for normal completion
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
# Job completed - increment counter and check restart conditions
|
||||
jobs_processed += 1
|
||||
runtime = time.time() - start_time
|
||||
|
||||
# Check if we should restart (only when idle, between jobs)
|
||||
should_restart_jobs = jobs_processed >= max_jobs
|
||||
should_restart_time = runtime >= max_runtime_seconds
|
||||
|
||||
if should_restart_jobs or should_restart_time:
|
||||
reason = f"{jobs_processed} jobs" if should_restart_jobs else f"{runtime:.0f}s runtime"
|
||||
logger.info(f"Worker {worker_id} restarting after {reason} ({jobs_processed} jobs, {runtime:.0f}s runtime)")
|
||||
return "restart"
|
||||
|
||||
# Check if we should exit
|
||||
if app.config.exit.is_set():
|
||||
break
|
||||
@@ -618,12 +473,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
import sys
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
|
||||
if not in_pytest:
|
||||
logger.info(f"Worker {worker_id} shutting down")
|
||||
|
||||
return "shutdown"
|
||||
|
||||
|
||||
def cleanup_error_artifacts(uuid, datastore):
|
||||
"""Helper function to clean up error artifacts"""
|
||||
@@ -13,7 +13,7 @@ from loguru import logger
|
||||
BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
|
||||
|
||||
|
||||
def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
def create_backup(datastore_path, watches: dict):
|
||||
logger.debug("Creating backup...")
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
@@ -27,31 +27,15 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8) as zipObj:
|
||||
|
||||
# Add the settings file (supports both formats)
|
||||
# New format: changedetection.json
|
||||
changedetection_json = os.path.join(datastore_path, "changedetection.json")
|
||||
if os.path.isfile(changedetection_json):
|
||||
zipObj.write(changedetection_json, arcname="changedetection.json")
|
||||
logger.debug("Added changedetection.json to backup")
|
||||
# Add the index
|
||||
zipObj.write(os.path.join(datastore_path, "url-watches.json"), arcname="url-watches.json")
|
||||
|
||||
# Legacy format: url-watches.json (for backward compatibility)
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
if os.path.isfile(url_watches_json):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
||||
for uuid, tag in (tags or {}).items():
|
||||
for f in Path(tag.data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
logger.debug(f"Added tag '{tag.get('title')}' ({uuid}) to backup")
|
||||
# Add the flask app secret
|
||||
zipObj.write(os.path.join(datastore_path, "secret.txt"), arcname="secret.txt")
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in watches.items():
|
||||
for f in Path(w.data_dir).glob('*'):
|
||||
for f in Path(w.watch_data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
# Use the full path to access the file, but make the file 'relative' in the Zip.
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
@@ -92,10 +76,7 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
from .restore import construct_restore_blueprint
|
||||
|
||||
backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
|
||||
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
|
||||
backup_threads = []
|
||||
|
||||
@login_optionally_required
|
||||
@@ -103,25 +84,20 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def request_backup():
|
||||
if any(thread.is_alive() for thread in backup_threads):
|
||||
flash(gettext("A backup is already running, check back in a few minutes"), "error")
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
|
||||
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
# With immediate persistence, all data is already saved
|
||||
zip_thread = threading.Thread(
|
||||
target=create_backup,
|
||||
args=(datastore.datastore_path, datastore.data.get("watching")),
|
||||
kwargs={'tags': datastore.data['settings']['application'].get('tags', {})},
|
||||
daemon=True,
|
||||
name="BackupCreator"
|
||||
)
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
zip_thread = threading.Thread(target=create_backup, args=(datastore.datastore_path, datastore.data.get("watching")))
|
||||
zip_thread.start()
|
||||
backup_threads.append(zip_thread)
|
||||
flash(gettext("Backup building in background, check back in a few minutes."))
|
||||
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
def find_backups():
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
@@ -146,34 +122,31 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def download_backup(filename):
|
||||
import re
|
||||
filename = filename.strip()
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
||||
abort(404)
|
||||
|
||||
# Resolve 'latest' before any validation so checks run against the real filename.
|
||||
if filename == 'latest':
|
||||
backups = find_backups()
|
||||
if not backups:
|
||||
abort(404)
|
||||
filename = backups[0]['filename']
|
||||
|
||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
|
||||
abort(404)
|
||||
|
||||
logger.debug(f"Backup download request for '{full_path}'")
|
||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/", methods=['GET'])
|
||||
@backups_blueprint.route("/create", methods=['GET'])
|
||||
def create():
|
||||
@backups_blueprint.route("", methods=['GET'])
|
||||
def index():
|
||||
backups = find_backups()
|
||||
output = render_template("backup_create.html",
|
||||
output = render_template("overview.html",
|
||||
available_backups=backups,
|
||||
backup_running=any(thread.is_alive() for thread in backup_threads)
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@login_optionally_required
|
||||
@@ -187,6 +160,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
flash(gettext("Backups were deleted."))
|
||||
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
return backups_blueprint
|
||||
|
||||
@@ -1,248 +0,0 @@
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
import threading
|
||||
import zipfile
|
||||
|
||||
from flask import Blueprint, render_template, flash, url_for, redirect, request
|
||||
from flask_babel import gettext, lazy_gettext as _l
|
||||
from wtforms import Form, BooleanField, SubmitField
|
||||
from flask_wtf.file import FileField, FileAllowed
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
|
||||
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
|
||||
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
|
||||
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
|
||||
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
|
||||
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
|
||||
_UUID_RE = re.compile(
|
||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
class RestoreForm(Form):
|
||||
zip_file = FileField(_l('Backup zip file'), validators=[
|
||||
FileAllowed(['zip'], _l('Must be a .zip backup file!'))
|
||||
])
|
||||
include_groups = BooleanField(_l('Include groups'), default=True)
|
||||
include_groups_replace_existing = BooleanField(_l('Replace existing groups of the same UUID'), default=True)
|
||||
include_watches = BooleanField(_l('Include watches'), default=True)
|
||||
include_watches_replace_existing = BooleanField(_l('Replace existing watches of the same UUID'), default=True)
|
||||
submit = SubmitField(_l('Restore backup'))
|
||||
|
||||
|
||||
def import_from_zip(zip_stream, datastore, include_groups, include_groups_replace, include_watches, include_watches_replace):
|
||||
"""
|
||||
Extract and import watches and groups from a backup zip stream.
|
||||
|
||||
Mirrors the store's _load_watches / _load_tags loading pattern:
|
||||
- UUID dirs with tag.json → Tag.model + tag_obj.commit()
|
||||
- UUID dirs with watch.json → rehydrate_entity + watch_obj.commit()
|
||||
|
||||
Returns a dict with counts: restored_groups, skipped_groups, restored_watches, skipped_watches.
|
||||
Raises zipfile.BadZipFile if the stream is not a valid zip.
|
||||
"""
|
||||
from changedetectionio.model import Tag
|
||||
|
||||
restored_groups = 0
|
||||
skipped_groups = 0
|
||||
restored_watches = 0
|
||||
skipped_watches = 0
|
||||
|
||||
current_tags = datastore.data['settings']['application'].get('tags', {})
|
||||
current_watches = datastore.data['watching']
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
||||
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
||||
total_uncompressed = sum(m.file_size for m in zf.infolist())
|
||||
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
|
||||
raise ValueError(
|
||||
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
|
||||
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
|
||||
)
|
||||
resolved_dest = os.path.realpath(tmpdir)
|
||||
for member in zf.infolist():
|
||||
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
|
||||
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
|
||||
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
|
||||
zf.extract(member, tmpdir)
|
||||
logger.debug("Restore: zip extracted, scanning UUID directories")
|
||||
|
||||
for entry in os.scandir(tmpdir):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
|
||||
uuid = entry.name
|
||||
if not _UUID_RE.match(uuid):
|
||||
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
|
||||
continue
|
||||
tag_json_path = os.path.join(entry.path, 'tag.json')
|
||||
watch_json_path = os.path.join(entry.path, 'watch.json')
|
||||
|
||||
# --- Tags (groups) ---
|
||||
if include_groups and os.path.exists(tag_json_path):
|
||||
if uuid in current_tags and not include_groups_replace:
|
||||
logger.debug(f"Restore: skipping existing group {uuid} (replace not requested)")
|
||||
skipped_groups += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(tag_json_path, 'r', encoding='utf-8') as f:
|
||||
tag_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.error(f"Restore: failed to read tag.json for {uuid}: {e}")
|
||||
continue
|
||||
|
||||
title = tag_data.get('title', uuid)
|
||||
logger.debug(f"Restore: importing group '{title}' ({uuid})")
|
||||
|
||||
# Mirror _load_tags: set uuid and force processor
|
||||
tag_data['uuid'] = uuid
|
||||
tag_data['processor'] = 'restock_diff'
|
||||
|
||||
# Copy the UUID directory so data_dir exists for commit()
|
||||
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
if os.path.exists(dst_dir):
|
||||
shutil.rmtree(dst_dir)
|
||||
shutil.copytree(entry.path, dst_dir)
|
||||
|
||||
tag_obj = Tag.model(
|
||||
datastore_path=datastore.datastore_path,
|
||||
__datastore=datastore.data,
|
||||
default=tag_data
|
||||
)
|
||||
current_tags[uuid] = tag_obj
|
||||
tag_obj.commit()
|
||||
restored_groups += 1
|
||||
logger.success(f"Restore: group '{title}' ({uuid}) restored")
|
||||
|
||||
# --- Watches ---
|
||||
elif include_watches and os.path.exists(watch_json_path):
|
||||
if uuid in current_watches and not include_watches_replace:
|
||||
logger.debug(f"Restore: skipping existing watch {uuid} (replace not requested)")
|
||||
skipped_watches += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(watch_json_path, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.error(f"Restore: failed to read watch.json for {uuid}: {e}")
|
||||
continue
|
||||
|
||||
url = watch_data.get('url', uuid)
|
||||
logger.debug(f"Restore: importing watch '{url}' ({uuid})")
|
||||
|
||||
# Copy UUID directory first so data_dir and history files exist
|
||||
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
if os.path.exists(dst_dir):
|
||||
shutil.rmtree(dst_dir)
|
||||
shutil.copytree(entry.path, dst_dir)
|
||||
|
||||
# Mirror _load_watches / rehydrate_entity
|
||||
watch_data['uuid'] = uuid
|
||||
watch_obj = datastore.rehydrate_entity(uuid, watch_data)
|
||||
current_watches[uuid] = watch_obj
|
||||
watch_obj.commit()
|
||||
restored_watches += 1
|
||||
logger.success(f"Restore: watch '{url}' ({uuid}) restored")
|
||||
|
||||
logger.debug(f"Restore: scan complete - groups {restored_groups} restored / {skipped_groups} skipped, "
|
||||
f"watches {restored_watches} restored / {skipped_watches} skipped")
|
||||
|
||||
# Persist changedetection.json (includes the updated tags dict)
|
||||
logger.debug("Restore: committing datastore settings")
|
||||
datastore.commit()
|
||||
|
||||
return {
|
||||
'restored_groups': restored_groups,
|
||||
'skipped_groups': skipped_groups,
|
||||
'restored_watches': restored_watches,
|
||||
'skipped_watches': skipped_watches,
|
||||
}
|
||||
|
||||
|
||||
|
||||
def construct_restore_blueprint(datastore):
|
||||
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
|
||||
restore_threads = []
|
||||
|
||||
@login_optionally_required
|
||||
@restore_blueprint.route("/restore", methods=['GET'])
|
||||
def restore():
|
||||
form = RestoreForm()
|
||||
return render_template("backup_restore.html",
|
||||
form=form,
|
||||
restore_running=any(t.is_alive() for t in restore_threads),
|
||||
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
||||
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
||||
|
||||
@login_optionally_required
|
||||
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||
def backups_restore_start():
|
||||
if any(t.is_alive() for t in restore_threads):
|
||||
flash(gettext("A restore is already running, check back in a few minutes"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
zip_file = request.files.get('zip_file')
|
||||
if not zip_file or not zip_file.filename:
|
||||
flash(gettext("No file uploaded"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
if not zip_file.filename.lower().endswith('.zip'):
|
||||
flash(gettext("File must be a .zip backup file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Reject oversized uploads before reading the stream into memory.
|
||||
content_length = request.content_length
|
||||
if content_length and content_length > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Read into memory now — the request stream is gone once we return.
|
||||
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
|
||||
try:
|
||||
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
|
||||
if len(raw) > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
zip_bytes = io.BytesIO(raw)
|
||||
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
|
||||
pass
|
||||
zip_bytes.seek(0)
|
||||
except zipfile.BadZipFile:
|
||||
flash(gettext("Invalid or corrupted zip file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
include_groups = request.form.get('include_groups') == 'y'
|
||||
include_groups_replace = request.form.get('include_groups_replace_existing') == 'y'
|
||||
include_watches = request.form.get('include_watches') == 'y'
|
||||
include_watches_replace = request.form.get('include_watches_replace_existing') == 'y'
|
||||
|
||||
restore_thread = threading.Thread(
|
||||
target=import_from_zip,
|
||||
kwargs={
|
||||
'zip_stream': zip_bytes,
|
||||
'datastore': datastore,
|
||||
'include_groups': include_groups,
|
||||
'include_groups_replace': include_groups_replace,
|
||||
'include_watches': include_watches,
|
||||
'include_watches_replace': include_watches_replace,
|
||||
},
|
||||
daemon=True,
|
||||
name="BackupRestore"
|
||||
)
|
||||
restore_thread.start()
|
||||
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
|
||||
restore_threads.append(restore_thread)
|
||||
flash(gettext("Restore started in background, check back in a few minutes."))
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
return restore_blueprint
|
||||
@@ -1,49 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab active" id=""><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
<div id="general">
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<p>
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li>
|
||||
<a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary"
|
||||
href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error "
|
||||
href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -1,62 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||
<li class="tab active"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
<div id="general">
|
||||
{% if restore_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A restore is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||
<p class="pure-form-message">
|
||||
{{ _('Max upload size: %(upload)s MB · Max decompressed size: %(decomp)s MB',
|
||||
upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
</p>
|
||||
|
||||
<form class="pure-form pure-form-stacked settings"
|
||||
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
||||
method="POST"
|
||||
enctype="multipart/form-data">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_groups) }}
|
||||
<span class="pure-form-message-inline">{{ _('Include all groups found in backup?') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_groups_replace_existing) }}
|
||||
<span class="pure-form-message-inline">{{ _('Replace any existing groups of the same UUID?') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_watches) }}
|
||||
<span class="pure-form-message-inline">{{ _('Include all watches found in backup?') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_watches_replace_existing) }}
|
||||
<span class="pure-form-message-inline">{{ _('Replace any existing watches of the same UUID?') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.zip_file) }}
|
||||
</div>
|
||||
|
||||
<div class="pure-controls">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Restore backup') }}</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
36
changedetectionio/blueprint/backups/templates/overview.html
Normal file
36
changedetectionio/blueprint/backups/templates/overview.html
Normal file
@@ -0,0 +1,36 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h4>{{ _('Backups') }}</h4>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
{% endblock %}
|
||||
@@ -21,247 +21,93 @@ from changedetectionio.flask_app import login_optionally_required
|
||||
from loguru import logger
|
||||
|
||||
browsersteps_sessions = {}
|
||||
browsersteps_watch_to_session = {} # Maps watch_uuid -> browsersteps_session_id
|
||||
io_interface_context = None
|
||||
import json
|
||||
import hashlib
|
||||
from flask import Response
|
||||
import asyncio
|
||||
import threading
|
||||
import time
|
||||
|
||||
# Dedicated event loop for ALL browser steps sessions
|
||||
_browser_steps_loop = None
|
||||
_browser_steps_thread = None
|
||||
_browser_steps_loop_lock = threading.Lock()
|
||||
|
||||
def _start_browser_steps_loop():
|
||||
"""Start a dedicated event loop for browser steps in its own thread"""
|
||||
global _browser_steps_loop
|
||||
|
||||
# Create and set the event loop for this thread
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
_browser_steps_loop = loop
|
||||
|
||||
logger.debug("Browser steps event loop started")
|
||||
|
||||
try:
|
||||
# Run the loop forever - handles all browsersteps sessions
|
||||
loop.run_forever()
|
||||
except Exception as e:
|
||||
logger.error(f"Browser steps event loop error: {e}")
|
||||
finally:
|
||||
try:
|
||||
# Cancel all remaining tasks
|
||||
pending = asyncio.all_tasks(loop)
|
||||
for task in pending:
|
||||
task.cancel()
|
||||
|
||||
# Wait for tasks to finish cancellation
|
||||
if pending:
|
||||
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
||||
except Exception as e:
|
||||
logger.debug(f"Error during browser steps loop cleanup: {e}")
|
||||
finally:
|
||||
loop.close()
|
||||
logger.debug("Browser steps event loop closed")
|
||||
|
||||
def _ensure_browser_steps_loop():
|
||||
"""Ensure the browser steps event loop is running"""
|
||||
global _browser_steps_loop, _browser_steps_thread
|
||||
|
||||
with _browser_steps_loop_lock:
|
||||
if _browser_steps_thread is None or not _browser_steps_thread.is_alive():
|
||||
logger.debug("Starting browser steps event loop thread")
|
||||
_browser_steps_thread = threading.Thread(
|
||||
target=_start_browser_steps_loop,
|
||||
daemon=True,
|
||||
name="BrowserStepsEventLoop"
|
||||
)
|
||||
_browser_steps_thread.start()
|
||||
|
||||
# Wait for the loop to be ready
|
||||
timeout = 5.0
|
||||
start_time = time.time()
|
||||
while _browser_steps_loop is None:
|
||||
if time.time() - start_time > timeout:
|
||||
raise RuntimeError("Browser steps event loop failed to start")
|
||||
time.sleep(0.01)
|
||||
|
||||
logger.debug("Browser steps event loop thread started and ready")
|
||||
|
||||
def run_async_in_browser_loop(coro):
|
||||
"""Run async coroutine using the dedicated browser steps event loop"""
|
||||
_ensure_browser_steps_loop()
|
||||
|
||||
if _browser_steps_loop and not _browser_steps_loop.is_closed():
|
||||
logger.debug("Browser steps using dedicated event loop")
|
||||
future = asyncio.run_coroutine_threadsafe(coro, _browser_steps_loop)
|
||||
"""Run async coroutine using the existing async worker event loop"""
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
# Use the existing async worker event loop instead of creating a new one
|
||||
if worker_handler.USE_ASYNC_WORKERS and worker_handler.async_loop and not worker_handler.async_loop.is_closed():
|
||||
logger.debug("Browser steps using existing async worker event loop")
|
||||
future = asyncio.run_coroutine_threadsafe(coro, worker_handler.async_loop)
|
||||
return future.result()
|
||||
else:
|
||||
raise RuntimeError("Browser steps event loop is not available")
|
||||
|
||||
async def _close_session_resources(session_data, label=''):
|
||||
"""Close all browser resources for a session in the correct order.
|
||||
|
||||
browserstepper.cleanup() closes page+context but not the browser itself.
|
||||
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
|
||||
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
|
||||
"""
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
# Fallback: create a new event loop (for sync workers or if async loop not available)
|
||||
logger.debug("Browser steps creating temporary event loop")
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
await browserstepper.cleanup()
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up browserstepper{label}: {e}")
|
||||
|
||||
browser = session_data.get('browser')
|
||||
if browser:
|
||||
try:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing browser{label}: {e}")
|
||||
|
||||
playwright_context = session_data.get('playwright_context')
|
||||
if playwright_context:
|
||||
try:
|
||||
await playwright_context.stop()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error stopping playwright context{label}: {e}")
|
||||
|
||||
|
||||
def cleanup_expired_sessions():
|
||||
"""Remove expired browsersteps sessions and cleanup their resources"""
|
||||
global browsersteps_sessions, browsersteps_watch_to_session
|
||||
|
||||
expired_session_ids = []
|
||||
|
||||
# Find expired sessions
|
||||
for session_id, session_data in browsersteps_sessions.items():
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper and browserstepper.has_expired:
|
||||
expired_session_ids.append(session_id)
|
||||
|
||||
# Cleanup expired sessions
|
||||
for session_id in expired_session_ids:
|
||||
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
||||
session_data = browsersteps_sessions[session_id]
|
||||
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
|
||||
# Remove from watch mapping
|
||||
for watch_uuid, mapped_session_id in list(browsersteps_watch_to_session.items()):
|
||||
if mapped_session_id == session_id:
|
||||
del browsersteps_watch_to_session[watch_uuid]
|
||||
break
|
||||
|
||||
if expired_session_ids:
|
||||
logger.info(f"Cleaned up {len(expired_session_ids)} expired browsersteps session(s)")
|
||||
|
||||
def cleanup_session_for_watch(watch_uuid):
|
||||
"""Cleanup a specific browsersteps session for a watch UUID"""
|
||||
global browsersteps_sessions, browsersteps_watch_to_session
|
||||
|
||||
session_id = browsersteps_watch_to_session.get(watch_uuid)
|
||||
if not session_id:
|
||||
logger.debug(f"No browsersteps session found for watch {watch_uuid}")
|
||||
return
|
||||
|
||||
logger.debug(f"Cleaning up browsersteps session {session_id} for watch {watch_uuid}")
|
||||
|
||||
session_data = browsersteps_sessions.get(session_id)
|
||||
if session_data:
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
|
||||
# Remove from watch mapping
|
||||
del browsersteps_watch_to_session[watch_uuid]
|
||||
|
||||
logger.debug(f"Cleaned up session for watch {watch_uuid}")
|
||||
|
||||
# Opportunistically cleanup any other expired sessions
|
||||
cleanup_expired_sessions()
|
||||
return loop.run_until_complete(coro)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||
|
||||
async def start_browsersteps_session(watch_uuid):
|
||||
from changedetectionio.browser_steps import browser_steps
|
||||
from . import browser_steps
|
||||
import time
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
# We keep the playwright session open for many minutes
|
||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
|
||||
browsersteps_start_session = {'start_time': time.time()}
|
||||
|
||||
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url')
|
||||
if proxy_url:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
# Create a new async playwright instance for browser steps
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
|
||||
# Resolve the fetcher class for this watch so we can ask it to launch its own browser
|
||||
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
|
||||
watch = datastore.data['watching'][watch_uuid]
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_class = content_fetchers.get_fetcher(watch.effective_browser_profile.fetch_backend)
|
||||
|
||||
browser = None
|
||||
playwright_context = None
|
||||
|
||||
# If the fetcher has its own browser launch for the live steps UI, use it.
|
||||
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
|
||||
# or None to fall back to the default CDP path.
|
||||
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
|
||||
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
|
||||
if result is not None:
|
||||
browser, playwright_context = result
|
||||
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_class.__name__}'")
|
||||
|
||||
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
|
||||
if browser is None:
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if '?' not in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if not '?' in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
browsersteps_start_session['browser'] = browser
|
||||
browsersteps_start_session['playwright_context'] = playwright_context
|
||||
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||
if proxy_url:
|
||||
|
||||
# Playwright needs separate username and password values
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
|
||||
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||
browserstepper = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browser,
|
||||
proxy=proxy,
|
||||
start_url=watch.link,
|
||||
headers=watch.get('headers')
|
||||
start_url=datastore.data['watching'][watch_uuid].link,
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
)
|
||||
|
||||
# Initialize the async connection
|
||||
await browserstepper.connect(proxy=proxy)
|
||||
|
||||
browsersteps_start_session['browserstepper'] = browserstepper
|
||||
|
||||
# For test
|
||||
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||
|
||||
return browsersteps_start_session
|
||||
|
||||
|
||||
@@ -269,6 +115,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
def browsersteps_start_session():
|
||||
# A new session was requested, return sessionID
|
||||
import asyncio
|
||||
import uuid
|
||||
browsersteps_session_id = str(uuid.uuid4())
|
||||
watch_uuid = request.args.get('uuid')
|
||||
@@ -276,9 +123,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
if not watch_uuid:
|
||||
return make_response('No Watch UUID specified', 500)
|
||||
|
||||
# Cleanup any existing session for this watch
|
||||
cleanup_session_for_watch(watch_uuid)
|
||||
|
||||
logger.debug("Starting connection with playwright")
|
||||
logger.debug("browser_steps.py connecting")
|
||||
|
||||
@@ -287,10 +131,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop(
|
||||
start_browsersteps_session(watch_uuid)
|
||||
)
|
||||
|
||||
# Store the mapping of watch_uuid -> browsersteps_session_id
|
||||
browsersteps_watch_to_session[watch_uuid] = browsersteps_session_id
|
||||
|
||||
except Exception as e:
|
||||
if 'ECONNREFUSED' in str(e):
|
||||
return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
|
||||
@@ -315,8 +155,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
filename = f"step_before-{step_n}.jpeg" if request.args.get('type', '') == 'before' else f"step_{step_n}.jpeg"
|
||||
|
||||
if step_n and watch and os.path.isfile(os.path.join(watch.data_dir, filename)):
|
||||
response = make_response(send_from_directory(directory=watch.data_dir, path=filename))
|
||||
if step_n and watch and os.path.isfile(os.path.join(watch.watch_data_dir, filename)):
|
||||
response = make_response(send_from_directory(directory=watch.watch_data_dir, path=filename))
|
||||
response.headers['Content-type'] = 'image/jpeg'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
@@ -331,10 +171,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
def browsersteps_ui_update():
|
||||
import base64
|
||||
import playwright._impl._errors
|
||||
from changedetectionio.blueprint.browser_steps import browser_steps
|
||||
|
||||
remaining = 0
|
||||
remaining =0
|
||||
uuid = request.args.get('uuid')
|
||||
goto_website_url_first_step = request.args.get('goto_website_url_first_step')
|
||||
|
||||
browsersteps_session_id = request.args.get('browsersteps_session_id')
|
||||
|
||||
@@ -345,33 +186,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response('No session exists under that ID', 500)
|
||||
|
||||
is_last_step = False
|
||||
|
||||
# @todo - should always be an existing session
|
||||
if goto_website_url_first_step:
|
||||
logger.debug("Going to site (requested automatically before stepping)..")
|
||||
step_operation = "Goto site"
|
||||
step_selector = None
|
||||
step_optional_value = None
|
||||
else:
|
||||
# Actions - step/apply/etc, do the thing and return state
|
||||
if request.method == 'POST':
|
||||
# @todo - should always be an existing session
|
||||
step_operation = request.form.get('operation')
|
||||
step_selector = request.form.get('selector')
|
||||
step_optional_value = request.form.get('optional_value')
|
||||
is_last_step = strtobool(request.form.get('is_last_step'))
|
||||
|
||||
try:
|
||||
# Run the async call_action method in the dedicated browser steps event loop
|
||||
run_async_in_browser_loop(
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
|
||||
action_name=step_operation,
|
||||
selector=step_selector,
|
||||
optional_value=step_optional_value
|
||||
try:
|
||||
# Run the async call_action method in the dedicated browser steps event loop
|
||||
run_async_in_browser_loop(
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
|
||||
action_name=step_operation,
|
||||
selector=step_selector,
|
||||
optional_value=step_optional_value
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
|
||||
# Try to find something of value to give back to the user
|
||||
return make_response(str(e).splitlines()[0], 401)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
|
||||
# Try to find something of value to give back to the user
|
||||
return make_response(str(e).splitlines()[0], 401)
|
||||
|
||||
|
||||
# if not this_session.page:
|
||||
# cleanup_playwright_session()
|
||||
# return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
||||
|
||||
# Screenshots and other info only needed on requesting a step (POST)
|
||||
try:
|
||||
@@ -379,7 +220,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
(screenshot, xpath_data) = run_async_in_browser_loop(
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||
)
|
||||
|
||||
|
||||
if is_last_step:
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||
|
||||
@@ -8,17 +8,6 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
|
||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
|
||||
def browser_steps_get_valid_steps(browser_steps: list):
|
||||
if browser_steps is not None and len(browser_steps):
|
||||
valid_steps = list(filter(
|
||||
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one'),browser_steps))
|
||||
|
||||
# Just incase they selected Goto site by accident with older JS
|
||||
if valid_steps and valid_steps[0]['operation'] == 'Goto site':
|
||||
del(valid_steps[0])
|
||||
|
||||
return valid_steps
|
||||
return []
|
||||
|
||||
|
||||
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
||||
@@ -40,14 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
contents = ''
|
||||
now = time.time()
|
||||
try:
|
||||
import asyncio
|
||||
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
|
||||
update_handler.preferred_proxy_override = preferred_proxy
|
||||
asyncio.run(update_handler.call_browser())
|
||||
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
|
||||
# title, size is len contents not len xfer
|
||||
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 404:
|
||||
@@ -96,13 +94,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return results
|
||||
|
||||
@login_required
|
||||
@check_proxies_blueprint.route("/<uuid_str:uuid>/status", methods=['GET'])
|
||||
@check_proxies_blueprint.route("/<string:uuid>/status", methods=['GET'])
|
||||
def get_recheck_status(uuid):
|
||||
results = _recalc_check_status(uuid=uuid)
|
||||
return results
|
||||
|
||||
@login_required
|
||||
@check_proxies_blueprint.route("/<uuid_str:uuid>/start", methods=['GET'])
|
||||
@check_proxies_blueprint.route("/<string:uuid>/start", methods=['GET'])
|
||||
def start_check(uuid):
|
||||
|
||||
if not datastore.proxy_list:
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
import_distill_io_json,
|
||||
import_xlsx_wachete,
|
||||
import_xlsx_custom
|
||||
)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
import_blueprint = Blueprint('imports', __name__, template_folder="templates")
|
||||
@@ -12,27 +17,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
def import_page():
|
||||
remaining_urls = []
|
||||
from changedetectionio import forms
|
||||
#
|
||||
|
||||
if request.method == 'POST':
|
||||
# from changedetectionio import worker_pool
|
||||
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
import_distill_io_json,
|
||||
import_xlsx_wachete,
|
||||
import_xlsx_custom
|
||||
)
|
||||
|
||||
# URL List import
|
||||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
from changedetectionio import processors
|
||||
importer_handler = import_url_list()
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', processors.get_default_processor()))
|
||||
logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs")
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||
for uuid in importer_handler.new_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
if len(importer_handler.remaining_data) == 0:
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -44,10 +37,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Import and push into the queue for immediate update check
|
||||
d_importer = import_distill_io_json()
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
for uuid in d_importer.new_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# XLSX importer
|
||||
if request.files and request.files.get('xlsx_file'):
|
||||
@@ -69,10 +60,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
w_importer.import_profile = map
|
||||
w_importer.run(data=file, flash=flash, datastore=datastore)
|
||||
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
for uuid in w_importer.new_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
||||
|
||||
@@ -62,7 +62,7 @@ class import_url_list(Importer):
|
||||
extras = None
|
||||
if processor:
|
||||
extras = {'processor': processor}
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, save_immediately=False, extras=extras)
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False, extras=extras)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
@@ -129,7 +129,7 @@ class import_distill_io_json(Importer):
|
||||
new_uuid = datastore.add_watch(url=d['uri'].strip(),
|
||||
tag=",".join(d.get('tags', [])),
|
||||
extras=extras,
|
||||
save_immediately=False)
|
||||
write_to_disk_now=False)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
@@ -175,9 +175,9 @@ class import_xlsx_wachete(Importer):
|
||||
dynamic_wachet = str(data.get('dynamic wachet', '')).strip().lower() # Convert bool to str to cover all cases
|
||||
# libreoffice and others can have it as =FALSE() =TRUE(), or bool(true)
|
||||
if 'true' in dynamic_wachet or dynamic_wachet == '1':
|
||||
extras['browser_profile'] = 'browser_chromeplaywright'
|
||||
extras['fetch_backend'] = 'html_webdriver'
|
||||
elif 'false' in dynamic_wachet or dynamic_wachet == '0':
|
||||
extras['browser_profile'] = 'direct_http_requests'
|
||||
extras['fetch_backend'] = 'html_requests'
|
||||
|
||||
if data.get('xpath'):
|
||||
# @todo split by || ?
|
||||
@@ -204,7 +204,7 @@ class import_xlsx_wachete(Importer):
|
||||
new_uuid = datastore.add_watch(url=data['url'].strip(),
|
||||
extras=extras,
|
||||
tag=data.get('folder'),
|
||||
save_immediately=False)
|
||||
write_to_disk_now=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
@@ -287,7 +287,7 @@ class import_xlsx_custom(Importer):
|
||||
new_uuid = datastore.add_watch(url=url,
|
||||
extras=extras,
|
||||
tag=tags,
|
||||
save_immediately=False)
|
||||
write_to_disk_now=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
|
||||
@@ -16,11 +16,6 @@
|
||||
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<div class="tab-pane-inner" id="url-list">
|
||||
|
||||
<p>
|
||||
{{ _('Restoring changedetection.io backups is in the') }}<a href="{{ url_for('backups.restore.restore') }}"> {{ _('backups section') }}</a>.
|
||||
<br>
|
||||
</p>
|
||||
<div class="pure-control-group">
|
||||
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
|
||||
<br>
|
||||
@@ -42,6 +37,9 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="distill-io">
|
||||
|
||||
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
|
||||
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
|
||||
@@ -51,6 +49,8 @@
|
||||
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
|
||||
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
|
||||
font-family:monospace;
|
||||
white-space: pre;
|
||||
@@ -114,7 +114,6 @@
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -4,7 +4,7 @@ from flask import Blueprint, flash, redirect, url_for
|
||||
from flask_login import login_required
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio import worker_handler
|
||||
from queue import PriorityQueue
|
||||
|
||||
PRICE_DATA_TRACK_ACCEPT = 'accepted'
|
||||
@@ -15,20 +15,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<uuid_str:uuid>/accept", methods=['GET'])
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
|
||||
def accept(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
datastore.data['watching'][uuid].commit()
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<uuid_str:uuid>/reject", methods=['GET'])
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
||||
def reject(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
||||
datastore.data['watching'][uuid].commit()
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
|
||||
|
||||
@@ -9,12 +9,11 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/watch/<uuid_str:uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
|
||||
def rss_single_watch(uuid):
|
||||
import time
|
||||
|
||||
from flask import make_response, request, Response
|
||||
from flask_babel import lazy_gettext as _l
|
||||
from flask import make_response, request
|
||||
from feedgen.feed import FeedGenerator
|
||||
from loguru import logger
|
||||
|
||||
@@ -38,17 +37,18 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
# Get the watch by UUID
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return Response(_l("Watch with UUID %(uuid)s not found", uuid=uuid), status=404, mimetype='text/plain')
|
||||
return f"Watch with UUID {uuid} not found", 404
|
||||
|
||||
# Check if watch has at least 2 history snapshots
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
return Response(_l("Watch %(uuid)s does not have enough history snapshots to show changes (need at least 2)", uuid=uuid), status=400, mimetype='text/plain')
|
||||
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
|
||||
|
||||
# Add uuid to watch for proper functioning
|
||||
watch['uuid'] = uuid
|
||||
|
||||
# Get the number of diffs to include (default: 5)
|
||||
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
|
||||
@@ -101,7 +101,7 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
date_index_from, date_index_to)
|
||||
|
||||
# Create and populate feed entry
|
||||
guid = f"{uuid}/{timestamp_to}"
|
||||
guid = f"{watch['uuid']}/{timestamp_to}"
|
||||
fe = fg.add_entry()
|
||||
title_suffix = f"Change @ {res['original_context']['change_datetime']}"
|
||||
populate_feed_entry(fe, watch, res.get('body', ''), guid, timestamp_to,
|
||||
|
||||
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||
def rss_tag_feed(tag_uuid):
|
||||
|
||||
from flask import make_response, request, url_for
|
||||
@@ -63,8 +63,11 @@ def construct_tag_routes(rss_blueprint, datastore):
|
||||
|
||||
# Only include unviewed watches
|
||||
if not watch.viewed:
|
||||
# Include a link to the diff page (use uuid from loop, don't modify watch dict)
|
||||
diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=uuid, _external=True)}
|
||||
# Add uuid to watch for proper functioning
|
||||
watch['uuid'] = uuid
|
||||
|
||||
# Include a link to the diff page
|
||||
diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||
|
||||
# Get watch label
|
||||
watch_label = get_watch_label(datastore, watch)
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import os
|
||||
from copy import deepcopy
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime
|
||||
from zoneinfo import ZoneInfo, available_timezones
|
||||
import secrets
|
||||
import time
|
||||
import flask_login
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
@@ -15,9 +14,6 @@ from changedetectionio.auth_decorator import login_optionally_required
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
settings_blueprint = Blueprint('settings', __name__, template_folder="templates")
|
||||
|
||||
from changedetectionio.blueprint.settings.browser_profile import construct_blueprint as construct_browser_profile_blueprint
|
||||
settings_blueprint.register_blueprint(construct_browser_profile_blueprint(datastore), url_prefix='/browsers')
|
||||
|
||||
@settings_blueprint.route("", methods=['GET', "POST"])
|
||||
@login_optionally_required
|
||||
def settings_page():
|
||||
@@ -78,37 +74,26 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
del (app_update['password'])
|
||||
|
||||
datastore.data['settings']['application'].update(app_update)
|
||||
|
||||
|
||||
# Handle dynamic worker count adjustment
|
||||
old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
|
||||
new_worker_count = form.data['requests'].get('workers', 1)
|
||||
|
||||
|
||||
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||
datastore.commit()
|
||||
|
||||
# Clear all checksums to force reprocessing with new settings
|
||||
# Global settings can affect watch behavior (filters, rendering, etc.)
|
||||
datastore.clear_all_last_checksums()
|
||||
|
||||
|
||||
# Adjust worker count if it changed
|
||||
if new_worker_count != old_worker_count:
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
|
||||
|
||||
# Check CPU core availability and warn if worker count is high
|
||||
cpu_count = os.cpu_count()
|
||||
if cpu_count and new_worker_count >= (cpu_count * 0.9):
|
||||
flash(gettext("Warning: Worker count ({}) is close to or exceeds available CPU cores ({})").format(
|
||||
new_worker_count, cpu_count), 'warning')
|
||||
|
||||
result = worker_pool.adjust_async_worker_count(
|
||||
|
||||
result = worker_handler.adjust_async_worker_count(
|
||||
new_count=new_worker_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=app,
|
||||
datastore=ds
|
||||
)
|
||||
|
||||
|
||||
if result['status'] == 'success':
|
||||
flash(gettext("Worker count adjusted: {}").format(result['message']), 'notice')
|
||||
elif result['status'] == 'not_supported':
|
||||
@@ -118,11 +103,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
|
||||
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
|
||||
datastore.commit()
|
||||
datastore.needs_write_urgent = True
|
||||
flash(gettext("Password protection enabled."), 'notice')
|
||||
flask_login.logout_user()
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Also save plugin settings from the same form submission
|
||||
plugin_tabs_list = get_plugin_settings_tabs()
|
||||
for tab in plugin_tabs_list:
|
||||
@@ -150,9 +137,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
active_plugins = get_active_plugins()
|
||||
python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
||||
|
||||
# Calculate uptime in seconds
|
||||
uptime_seconds = time.time() - datastore.start_time
|
||||
|
||||
# Get plugin settings tabs and instantiate forms
|
||||
plugin_tabs = get_plugin_settings_tabs()
|
||||
plugin_forms = {}
|
||||
@@ -171,7 +155,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
active_plugins=active_plugins,
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
python_version=python_version,
|
||||
uptime_seconds=uptime_seconds,
|
||||
available_timezones=sorted(available_timezones()),
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
|
||||
@@ -192,7 +175,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def settings_reset_api_key():
|
||||
secret = secrets.token_hex(16)
|
||||
datastore.data['settings']['application']['api_access_token'] = secret
|
||||
datastore.commit()
|
||||
datastore.needs_write_urgent = True
|
||||
flash(gettext("API Key was regenerated."))
|
||||
return redirect(url_for('settings.settings_page')+'#api')
|
||||
|
||||
@@ -204,32 +187,4 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."])
|
||||
return output
|
||||
|
||||
@settings_blueprint.route("/toggle-all-paused", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def toggle_all_paused():
|
||||
current_state = datastore.data['settings']['application'].get('all_paused', False)
|
||||
datastore.data['settings']['application']['all_paused'] = not current_state
|
||||
datastore.commit()
|
||||
|
||||
if datastore.data['settings']['application']['all_paused']:
|
||||
flash(gettext("Automatic scheduling paused - checks will not be queued."), 'notice')
|
||||
else:
|
||||
flash(gettext("Automatic scheduling resumed - checks will be queued normally."), 'notice')
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@settings_blueprint.route("/toggle-all-muted", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def toggle_all_muted():
|
||||
current_state = datastore.data['settings']['application'].get('all_muted', False)
|
||||
datastore.data['settings']['application']['all_muted'] = not current_state
|
||||
datastore.commit()
|
||||
|
||||
if datastore.data['settings']['application']['all_muted']:
|
||||
flash(gettext("All notifications muted."), 'notice')
|
||||
else:
|
||||
flash(gettext("All notifications unmuted."), 'notice')
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return settings_blueprint
|
||||
@@ -1,196 +0,0 @@
|
||||
import flask_login
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
settings_browser_profile_blueprint = Blueprint(
|
||||
'settings_browsers',
|
||||
__name__,
|
||||
template_folder="templates"
|
||||
)
|
||||
|
||||
def _render_index(browser_profile_form=None, editing_machine_name=None):
|
||||
from changedetectionio import forms
|
||||
from changedetectionio import content_fetchers as cf
|
||||
from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
|
||||
|
||||
# Only browser-capable fetchers are valid profile types
|
||||
fetcher_choices = cf.available_browser_fetchers()
|
||||
if browser_profile_form is None:
|
||||
browser_profile_form = forms.BrowserProfileForm()
|
||||
browser_profile_form.fetch_backend.choices = fetcher_choices
|
||||
|
||||
fetcher_supports_screenshots = {name: True for name, _ in fetcher_choices}
|
||||
|
||||
# Table shows default built-in profiles first, then user-created profiles
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
user_profiles = dict(cf.DEFAULT_BROWSER_PROFILES)
|
||||
for machine_name, raw in store_profiles.items():
|
||||
try:
|
||||
user_profiles[machine_name] = BrowserProfile(**raw) if isinstance(raw, dict) else raw
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
current_default = datastore.data['settings']['application'].get('browser_profile') or 'direct_http_requests'
|
||||
|
||||
return render_template(
|
||||
"browser_profiles.html",
|
||||
browser_profiles=user_profiles,
|
||||
browser_profile_form=browser_profile_form,
|
||||
reserved_browser_profile_names=RESERVED_MACHINE_NAMES,
|
||||
fetcher_choices=fetcher_choices,
|
||||
fetcher_supports_screenshots=fetcher_supports_screenshots,
|
||||
current_default_profile=current_default,
|
||||
editing_machine_name=editing_machine_name,
|
||||
)
|
||||
|
||||
@settings_browser_profile_blueprint.route("", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def index():
|
||||
return _render_index()
|
||||
|
||||
@settings_browser_profile_blueprint.route("/<string:machine_name>/edit", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def edit(machine_name):
|
||||
from changedetectionio import forms
|
||||
from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
|
||||
|
||||
if machine_name in RESERVED_MACHINE_NAMES:
|
||||
flash(gettext("Built-in browser profiles cannot be edited."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
raw = store_profiles.get(machine_name)
|
||||
if raw is None:
|
||||
flash(gettext("Browser profile not found."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
profile = BrowserProfile(**raw) if isinstance(raw, dict) else raw
|
||||
form = forms.BrowserProfileForm(data=profile.model_dump())
|
||||
return _render_index(browser_profile_form=form, editing_machine_name=machine_name)
|
||||
|
||||
@settings_browser_profile_blueprint.route("/save", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def save():
|
||||
from changedetectionio import forms
|
||||
from changedetectionio import content_fetchers as cf
|
||||
from changedetectionio.model.browser_profile import BrowserProfile, RESERVED_MACHINE_NAMES
|
||||
|
||||
fetcher_choices = [(name, desc) for name, desc in cf.available_fetchers()]
|
||||
browser_profile_form = forms.BrowserProfileForm(formdata=request.form)
|
||||
browser_profile_form.fetch_backend.choices = fetcher_choices
|
||||
|
||||
if not browser_profile_form.validate():
|
||||
flash(gettext("Browser profile error: {}").format(
|
||||
'; '.join(str(e) for errs in browser_profile_form.errors.values() for e in errs)
|
||||
), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
name = browser_profile_form.name.data.strip()
|
||||
machine_name = BrowserProfile.machine_name_from_str(name)
|
||||
|
||||
if machine_name in RESERVED_MACHINE_NAMES:
|
||||
flash(gettext("Cannot use reserved profile name '{}'. Please choose a different name.").format(name), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
original_machine_name = request.form.get('original_machine_name', '').strip()
|
||||
store_profiles = datastore.data['settings']['application'].setdefault('browser_profiles', {})
|
||||
|
||||
if machine_name != original_machine_name and machine_name in store_profiles:
|
||||
flash(gettext("A browser profile named '{}' already exists.").format(name), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
profile_data = {
|
||||
'name': name,
|
||||
'fetch_backend': browser_profile_form.fetch_backend.data,
|
||||
'browser_connection_url': browser_profile_form.browser_connection_url.data or None,
|
||||
'viewport_width': browser_profile_form.viewport_width.data or 1280,
|
||||
'viewport_height': browser_profile_form.viewport_height.data or 1000,
|
||||
'block_images': bool(browser_profile_form.block_images.data),
|
||||
'block_fonts': bool(browser_profile_form.block_fonts.data),
|
||||
'ignore_https_errors': bool(browser_profile_form.ignore_https_errors.data),
|
||||
'user_agent': browser_profile_form.user_agent.data or None,
|
||||
'locale': browser_profile_form.locale.data or None,
|
||||
'is_builtin': False,
|
||||
}
|
||||
|
||||
try:
|
||||
BrowserProfile(**profile_data)
|
||||
except Exception as e:
|
||||
flash(gettext("Browser profile validation error: {}").format(str(e)), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
# Handle rename: remove old key, cascade-update watches and tags
|
||||
if original_machine_name and original_machine_name != machine_name and original_machine_name in store_profiles:
|
||||
del store_profiles[original_machine_name]
|
||||
for watch in datastore.data['watching'].values():
|
||||
if watch.get('browser_profile') == original_machine_name:
|
||||
watch['browser_profile'] = machine_name
|
||||
for tag in datastore.data.get('settings', {}).get('application', {}).get('tags', {}).values():
|
||||
if tag.get('browser_profile') == original_machine_name:
|
||||
tag['browser_profile'] = machine_name
|
||||
|
||||
store_profiles[machine_name] = profile_data
|
||||
datastore.commit()
|
||||
flash(gettext("Browser profile '{}' saved.").format(name), 'notice')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
@settings_browser_profile_blueprint.route("/<string:machine_name>/delete", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete(machine_name):
|
||||
from changedetectionio.model.browser_profile import RESERVED_MACHINE_NAMES
|
||||
|
||||
if machine_name in RESERVED_MACHINE_NAMES:
|
||||
flash(gettext("Built-in browser profiles cannot be deleted."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
if machine_name not in store_profiles:
|
||||
flash(gettext("Browser profile not found."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
raw = store_profiles[machine_name]
|
||||
profile_name = raw.get('name', machine_name) if isinstance(raw, dict) else machine_name
|
||||
|
||||
for watch in datastore.data['watching'].values():
|
||||
if watch.get('browser_profile') == machine_name:
|
||||
watch['browser_profile'] = None
|
||||
|
||||
for tag in datastore.data.get('settings', {}).get('application', {}).get('tags', {}).values():
|
||||
if tag.get('browser_profile') == machine_name:
|
||||
tag['browser_profile'] = None
|
||||
|
||||
if datastore.data['settings']['application'].get('browser_profile') == machine_name:
|
||||
datastore.data['settings']['application']['browser_profile'] = None
|
||||
|
||||
del store_profiles[machine_name]
|
||||
datastore.commit()
|
||||
flash(gettext("Browser profile '{}' deleted.").format(profile_name), 'notice')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
@settings_browser_profile_blueprint.route("/set-default", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def set_default():
|
||||
from changedetectionio import content_fetchers as cf
|
||||
|
||||
machine_name = request.form.get('machine_name', '').strip()
|
||||
if not machine_name:
|
||||
flash(gettext("No profile specified."), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
from changedetectionio.model.browser_profile import get_profile
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
if get_profile(machine_name, store_profiles) is None:
|
||||
flash(gettext("Unknown browser profile '{}'.").format(machine_name), 'error')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
datastore.data['settings']['application']['browser_profile'] = machine_name
|
||||
datastore.commit()
|
||||
flash(gettext("Default browser profile set to '{}'.").format(machine_name), 'notice')
|
||||
return redirect(url_for('settings.settings_browsers.index'))
|
||||
|
||||
return settings_browser_profile_blueprint
|
||||
@@ -1,154 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h2>{{ _('Browser Profiles') }}</h2>
|
||||
<p>{{ _('Create named profiles to configure browser settings — viewport size, connection URL, image/font blocking, and more. Each profile is based on an available browser type.') }}</p>
|
||||
|
||||
<form id="set-default-form" action="{{ url_for('settings.settings_browsers.set_default') }}" method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<input type="hidden" name="machine_name" id="default-machine-name" value="">
|
||||
</form>
|
||||
{% if browser_profiles %}
|
||||
<table class="pure-table pure-table-striped" style="width:100%; margin-bottom:1.5em;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="width:2.5em; text-align:center;" title="{{ _('System default') }}">{{ _('Default') }}</th>
|
||||
<th>{{ _('Name') }}</th>
|
||||
<th>{{ _('Type') }}</th>
|
||||
<th style="width:3em; text-align:center;"></th>
|
||||
<th>{{ _('Viewport') }}</th>
|
||||
<th>{{ _('Options') }}</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for machine_name, profile in browser_profiles.items() %}
|
||||
<tr>
|
||||
<td style="text-align:center;">
|
||||
<input type="radio"
|
||||
name="default_profile"
|
||||
value="{{ machine_name }}"
|
||||
title="{{ _('Set as system default') }}"
|
||||
{% if machine_name == current_default_profile %}checked{% endif %}
|
||||
onchange="setDefaultProfile('{{ machine_name }}')">
|
||||
</td>
|
||||
<td>{{ profile.name }}</td>
|
||||
<td><code>{{ profile.fetch_backend }}</code></td>
|
||||
<td style="text-align:center;">{{ profile.get_fetcher_class_name()|fetcher_status_icons }}</td>
|
||||
<td>{{ profile.viewport_width }}×{{ profile.viewport_height }}</td>
|
||||
<td style="font-size:0.8em; line-height:1.6;">
|
||||
{% if profile.block_images %}{{ _('No images') }}<br>{% endif %}
|
||||
{% if profile.block_fonts %}{{ _('No fonts') }}<br>{% endif %}
|
||||
{% if profile.ignore_https_errors %}{{ _('Ignore TLS') }}<br>{% endif %}
|
||||
{% if profile.browser_connection_url %}<span title="{{ profile.browser_connection_url }}">{{ _('Custom URL') }}</span>{% endif %}
|
||||
</td>
|
||||
<td style="white-space:nowrap;">
|
||||
{% if not profile.is_builtin %}
|
||||
<a href="{{ url_for('settings.settings_browsers.edit', machine_name=machine_name) }}"
|
||||
class="pure-button button-small">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('settings.settings_browsers.delete', machine_name=machine_name) }}"
|
||||
class="pure-button button-small button-error"
|
||||
onclick="return confirm('{{ _('Delete this browser profile?') }}')">{{ _('Delete') }}</a>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% else %}
|
||||
<p style="color:#888; font-style:italic;">{{ _('No browser profiles configured yet. Add one below.') }}</p>
|
||||
{% endif %}
|
||||
|
||||
<div class="border-fieldset">
|
||||
<h3 id="profile-form-heading">{{ _('Edit browser profile') if editing_machine_name else _('Add new browser profile') }}</h3>
|
||||
{% if not editing_machine_name %}
|
||||
<p style="font-size:0.9em; color:#666;">{{ _('Choose a browser type, give it a name, and configure its settings. You can create multiple profiles from the same type with different connection URLs or options.') }}</p>
|
||||
{% endif %}
|
||||
<form class="pure-form pure-form-stacked"
|
||||
id="browser-profile-form"
|
||||
action="{{ url_for('settings.settings_browsers.save') }}"
|
||||
method="POST">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<input type="hidden" name="original_machine_name" id="original_machine_name" value="{{ editing_machine_name or '' }}">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(browser_profile_form.name) }}
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(browser_profile_form.fetch_backend, id="profile-fetch-backend") }}
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_field(browser_profile_form.browser_connection_url) }}
|
||||
<span class="pure-form-message-inline">{{ _('Optional — override the system CDP/WebSocket URL for this profile only (e.g.') }} <code>ws://my-chrome:3000</code>).</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field" style="display:flex; gap:1em; flex-wrap:wrap;">
|
||||
<div>{{ render_field(browser_profile_form.viewport_width) }}</div>
|
||||
<div>{{ render_field(browser_profile_form.viewport_height) }}</div>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_checkbox_field(browser_profile_form.block_images) }}
|
||||
<span class="pure-form-message-inline">{{ _('Block image downloads — speeds up loads on image-heavy pages.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_checkbox_field(browser_profile_form.block_fonts) }}
|
||||
<span class="pure-form-message-inline">{{ _('Block web font downloads.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_checkbox_field(browser_profile_form.ignore_https_errors) }}
|
||||
<span class="pure-form-message-inline">{{ _('Ignore TLS/HTTPS certificate errors (useful for self-signed certs on staging sites).') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_field(browser_profile_form.user_agent) }}
|
||||
<span class="pure-form-message-inline">{{ _("Leave blank to use the fetcher's default User-Agent.") }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group browser-only-field">
|
||||
{{ render_field(browser_profile_form.locale) }}
|
||||
<span class="pure-form-message-inline">{{ _('Sets Accept-Language and navigator.language (e.g. en-US, de-DE).') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<button type="submit" class="pure-button pure-button-primary" id="profile-submit-btn">{{ _('Save profile') }}</button>
|
||||
{% if editing_machine_name %}
|
||||
<a href="{{ url_for('settings.settings_browsers.index') }}" class="pure-button button-cancel">{{ _('Cancel') }}</a>
|
||||
{% endif %}
|
||||
<a href="{{ url_for('settings.settings_page') }}" class="pure-button button-cancel">{{ _('Back to Settings') }}</a>
|
||||
</div>
|
||||
</fieldset>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function setDefaultProfile(machineName) {
|
||||
document.getElementById('default-machine-name').value = machineName;
|
||||
document.getElementById('set-default-form').submit();
|
||||
}
|
||||
|
||||
const fetcherSupportsBrowser = {{ fetcher_supports_screenshots | tojson }};
|
||||
|
||||
function updateBrowserFieldVisibility() {
|
||||
const fetchBackend = document.getElementById('profile-fetch-backend').value;
|
||||
const isBrowser = !!fetcherSupportsBrowser[fetchBackend];
|
||||
document.querySelectorAll('.browser-only-field').forEach(function(el) {
|
||||
el.style.display = isBrowser ? '' : 'none';
|
||||
});
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const sel = document.getElementById('profile-fetch-backend');
|
||||
if (sel) {
|
||||
sel.addEventListener('change', updateBrowserFieldVisibility);
|
||||
updateBrowserFieldVisibility();
|
||||
}
|
||||
});
|
||||
|
||||
{% if editing_machine_name %}
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
document.getElementById('browser-profile-form').scrollIntoView({behavior: 'smooth'});
|
||||
});
|
||||
{% endif %}
|
||||
</script>
|
||||
{% endblock %}
|
||||
@@ -25,10 +25,8 @@
|
||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('settings.settings_browsers.index') }}">{{ _('Browsers') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
{% for tab in plugin_tabs %}
|
||||
<li class="tab"><a href="#plugin-{{ tab.plugin_id }}">{{ tab.tab_label }}</a></li>
|
||||
@@ -55,72 +53,64 @@
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
|
||||
<span class="pure-form-message-inline">{{ _('After this many consecutive times that the CSS/xPath filter is missing, send a notification') }}
|
||||
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
|
||||
<br>
|
||||
{{ _('Set to') }} <strong>0</strong> {{ _('to disable') }}
|
||||
Set to <strong>0</strong> to disable
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.history_snapshot_max_length, class="history_snapshot_max_length") }}
|
||||
<span class="pure-form-message-inline">{{ _('Limit collection of history snapshots for each watch to this number of history items.') }}
|
||||
<br>
|
||||
{{ _('Set to empty to disable / no limit') }}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{% if not hide_remove_pass %}
|
||||
{% if current_user.is_authenticated %}
|
||||
{{ render_button(form.application.form.removepassword_button) }}
|
||||
{% else %}
|
||||
{{ render_field(form.application.form.password) }}
|
||||
<span class="pure-form-message-inline">{{ _('Password protection for your changedetection.io application.') }}</span>
|
||||
<span class="pure-form-message-inline">Password protection for your changedetection.io application.</span>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<span class="pure-form-message-inline">{{ _('Password is locked.') }}</span>
|
||||
<span class="pure-form-message-inline">Password is locked.</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
|
||||
<span class="pure-form-message-inline">{{ _('Allow access to the watch change history page when password is enabled (Good for sharing the diff page)') }}</span>
|
||||
<span class="pure-form-message-inline">Allow access to the watch change history page when password is enabled (Good for sharing the diff page)
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">{{ _('When a request returns no content, or the HTML does not contain any text, is this considered a change?') }}</span>
|
||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">{{ _('Choose a default proxy for all watches') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="notifications">
|
||||
<fieldset>
|
||||
{{ render_common_settings_form(form.application.form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
|
||||
<div class="field-group">
|
||||
{{ render_common_settings_form(form.application.form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
<div class="pure-control-group" id="notification-base-url">
|
||||
{{ render_field(form.application.form.base_url, class="m-d") }}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Base URL used for the') }} <code>{{ '{{ base_url }}' }}</code> {{ _('token in notification links.') }}<br>
|
||||
{{ _('Default value is the system environment variable') }} '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">{{ _('read more here') }}</a>.
|
||||
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notification links.<br>
|
||||
Default value is the system environment variable '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="fetching">
|
||||
<fieldset class="pure-group" id="webdriver-override-options">
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
</div>
|
||||
<fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
|
||||
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
|
||||
<br>
|
||||
{{ _('This will wait') }} <i>n</i> {{ _('seconds before extracting the text.') }}
|
||||
This will wait <i>n</i> seconds before extracting the text.
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.webdriver_delay) }}
|
||||
@@ -129,27 +119,28 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.workers) }}
|
||||
{% set worker_info = get_worker_status_info() %}
|
||||
<span class="pure-form-message-inline">{{ _('Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.') }}<br>
|
||||
{{ _('Currently running:') }} <strong>{{ worker_info.count }}</strong> {{ _('operational') }} {{ worker_info.type }} {{ _('workers') }}{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} {{ _('actively processing') }}){% endif %}.</span>
|
||||
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
|
||||
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
|
||||
<span class="pure-form-message-inline">{{ _('Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later') }}</span>
|
||||
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.timeout) }}
|
||||
<span class="pure-form-message-inline">{{ _('For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.') }}</span><br>
|
||||
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.default_ua) }}
|
||||
<span class="pure-form-message-inline">
|
||||
{{ _('Applied to all requests.') }}<br><br>
|
||||
{{ _('Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it\'s important to consider') }} <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">{{ _('all of the ways that the browser is detected') }}</a>.
|
||||
Applied to all requests.<br><br>
|
||||
Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it's important to consider <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">all of the ways that the browser is detected</a>.
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
|
||||
<br>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -157,15 +148,15 @@
|
||||
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.ignore_whitespace) }}
|
||||
<span class="pure-form-message-inline">{{ _('Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.') }}<br>
|
||||
<i>{{ _('Note:') }}</i> {{ _('Changing this will change the status of your existing watches, possibly trigger alerts etc.') }}
|
||||
<span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.render_anchor_tag_content) }}
|
||||
<span class="pure-form-message-inline">{{ _('Render anchor tag content, default disabled, when enabled renders links as') }} <code>(link text)[https://somesite.com]</code>
|
||||
<span class="pure-form-message-inline">Render anchor tag content, default disabled, when enabled renders links as <code>(link text)[https://somesite.com]</code>
|
||||
<br>
|
||||
<i>{{ _('Note:') }}</i> {{ _('Changing this could affect the content of your existing watches, possibly trigger alerts etc.') }}
|
||||
<i>Note:</i> Changing this could affect the content of your existing watches, possibly trigger alerts etc.
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
@@ -176,9 +167,9 @@ nav
|
||||
//*[contains(text(), 'Advertisement')]") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> {{ _('Remove HTML element(s) by CSS and XPath selectors before text conversion.') }} </li>
|
||||
<li> {{ _('Don\'t paste HTML here, use only CSS and XPath selectors') }} </li>
|
||||
<li> {{ _('Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML.') }} </li>
|
||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
@@ -186,50 +177,50 @@ nav
|
||||
{{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
|
||||
/some.regex\d{2}/ for case-INsensitive regex
|
||||
") }}
|
||||
<span class="pure-form-message-inline">{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</span><br>
|
||||
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>{{ _('Matching text will be') }} <strong>{{ _('ignored') }}</strong> {{ _('in the text snapshot (you can still see it but it wont trigger a change)') }}</li>
|
||||
<li>{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</li>
|
||||
<li>{{ _('Each line processed separately, any line matching will be ignored (removed before creating the checksum)') }}</li>
|
||||
<li>{{ _('Regular Expression support, wrap the entire line in forward slash') }} <code>/regex/</code></li>
|
||||
<li>{{ _('Changing this will affect the comparison checksum which may trigger an alert') }}</li>
|
||||
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
|
||||
<li>Note: This is applied globally in addition to the per-watch rules.</li>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.strip_ignored_lines) }}
|
||||
<span class="pure-form-message-inline">{{ _('Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)') }}<br>
|
||||
<i>{{ _('Note:') }}</i> {{ _('Changing this will change the status of your existing watches, possibly trigger alerts etc.') }}
|
||||
<span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
</span>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="api">
|
||||
<h4>{{ _('API Access') }}</h4>
|
||||
<p>{{ _('Drive your changedetection.io via API, More about') }} <a href="https://changedetection.io/docs/api_v1/index.html">{{ _('API access and examples here') }}</a>.</p>
|
||||
<h4>API Access</h4>
|
||||
<p>Drive your changedetection.io via API, More about <a href="https://changedetection.io/docs/api_v1/index.html">API access and examples here</a>.</p>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
|
||||
<div class="pure-form-message-inline">{{ _('Restrict API access limit by using') }} <code>x-api-key</code> {{ _('header - required for the Chrome Extension to work') }}</div><br>
|
||||
<div class="pure-form-message-inline"><br>{{ _('API Key') }} <span id="api-key">{{api_key}}</span>
|
||||
<span style="display:none;" id="api-key-copy" >{{ _('copy') }}</span>
|
||||
<div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header - required for the Chrome Extension to work</div><br>
|
||||
<div class="pure-form-message-inline"><br>API Key <span id="api-key">{{api_key}}</span>
|
||||
<span style="display:none;" id="api-key-copy" >copy</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<a href="{{url_for('settings.settings_reset_api_key')}}" class="pure-button button-small button-cancel">{{ _('Regenerate API key') }}</a>
|
||||
<a href="{{url_for('settings.settings_reset_api_key')}}" class="pure-button button-small button-cancel">Regenerate API key</a>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<h4>{{ _('Chrome Extension') }}</h4>
|
||||
<p>{{ _('Easily add any web-page to your changedetection.io installation from within Chrome.') }}</p>
|
||||
<strong>{{ _('Step 1') }}</strong> {{ _('Install the extension,') }} <strong>{{ _('Step 2') }}</strong> {{ _('Navigate to this page,') }}
|
||||
<strong>{{ _('Step 3') }}</strong> {{ _('Open the extension from the toolbar and click') }} "<i>{{ _('Sync API Access') }}</i>"
|
||||
<h4>Chrome Extension</h4>
|
||||
<p>Easily add any web-page to your changedetection.io installation from within Chrome.</p>
|
||||
<strong>Step 1</strong> Install the extension, <strong>Step 2</strong> Navigate to this page,
|
||||
<strong>Step 3</strong> Open the extension from the toolbar and click "<i>Sync API Access</i>"
|
||||
<p>
|
||||
<a id="chrome-extension-link"
|
||||
title="{{ _('Try our new Chrome Extension!') }}"
|
||||
title="Try our new Chrome Extension!"
|
||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||
<img alt="{{ _('Chrome store icon') }}" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" >
|
||||
{{ _('Chrome Webstore') }}
|
||||
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" alt="Chrome">
|
||||
Chrome Webstore
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
@@ -240,20 +231,20 @@ nav
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_diff_length) }}
|
||||
<span class="pure-form-message-inline">{{ _('Maximum number of history snapshots to include in the watch specific RSS feed.') }}</span>
|
||||
<span class="pure-form-message-inline">Maximum number of history snapshots to include in the watch specific RSS feed.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
|
||||
<span class="pure-form-message-inline">{{ _('For watching other RSS feeds - When watching RSS/Atom feeds, convert them into clean text for better change detection.') }}</span>
|
||||
<span class="pure-form-message-inline">For watching other RSS feeds - When watching RSS/Atom feeds, convert them into clean text for better change detection.</span>
|
||||
</div>
|
||||
<div class="pure-control-group grey-form-border">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">{{ _('Does your reader support HTML? Set it here') }}</span>
|
||||
<span class="pure-form-message-inline">Does your reader support HTML? Set it here</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_template_type) }}
|
||||
<span class="pure-form-message-inline">{{ _('\'System default\' for the same template for all items, or re-use your "Notification Body" as the template.') }}</span>
|
||||
<span class="pure-form-message-inline">'System default' for the same template for all items, or re-use your "Notification Body" as the template.</span>
|
||||
</div>
|
||||
<div>
|
||||
{{ render_field(form.application.form.rss_template_override) }}
|
||||
@@ -266,38 +257,38 @@ nav
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="timedate">
|
||||
<div class="pure-control-group">
|
||||
{{ _('Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.') }}
|
||||
Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<p><strong>{{ _('UTC Time & Date from Server:') }}</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
||||
<p><strong>{{ _('Local Time & Date in Browser:') }}</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
||||
<div>
|
||||
<p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
||||
<p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
||||
<p>
|
||||
{{ render_field(form.application.form.scheduler_timezone_default) }}
|
||||
<datalist id="timezones" style="display: none;">
|
||||
{%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%}
|
||||
</datalist>
|
||||
</div>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="ui-options">
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
|
||||
<span class="pure-form-message-inline">{{ _('Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.') }}</span>
|
||||
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }}
|
||||
<span class="pure-form-message-inline">{{ _('Realtime UI Updates Enabled - (Restart required if this is changed)') }}</span>
|
||||
<span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
|
||||
<span class="pure-form-message-inline">{{ _('Enable or Disable Favicons next to the watch list') }}</span>
|
||||
<span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.pager_size) }}
|
||||
<span class="pure-form-message-inline">{{ _('Number of items per page in the watch overview list, 0 to disable.') }}</span>
|
||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
@@ -345,12 +336,21 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
|
||||
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
<span class="pure-form-message-inline">{{ _('"Name" will be used for selecting the proxy in the Watch Edit settings') }}</span><br>
|
||||
<span class="pure-form-message-inline">{{ _('SOCKS5 proxies with authentication are only supported with \'plain requests\' fetcher, for other fetchers you should whitelist the IP access instead') }}</span>
|
||||
<span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br>
|
||||
<span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">Choose a default proxy for all watches</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group" id="extra-browsers-setting">
|
||||
<p>
|
||||
@@ -387,7 +387,6 @@ nav
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<div class="tab-pane-inner" id="info">
|
||||
<p><strong>{{ _('Uptime:') }}</strong> {{ uptime_seconds|format_duration }}</p>
|
||||
<p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
|
||||
<p><strong>{{ _('Plugins active:') }}</strong></p>
|
||||
{% if active_plugins %}
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
import threading
|
||||
from flask import Blueprint, request, render_template, flash, url_for, redirect
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
@@ -54,94 +52,52 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/mute/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def mute(uuid):
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if tag:
|
||||
tag['notification_muted'] = not tag['notification_muted']
|
||||
tag.commit()
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = not datastore.data['settings']['application']['tags'][uuid]['notification_muted']
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete(uuid):
|
||||
# Delete the tag from settings immediately
|
||||
removed = 0
|
||||
# Delete the tag, and any tag reference
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
del datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
# Remove tag from all watches in background thread to avoid blocking
|
||||
def remove_tag_background(tag_uuid):
|
||||
"""Background thread to remove tag from watches - discarded after completion."""
|
||||
removed_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
watch.commit()
|
||||
removed_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} removed from {removed_count} watches")
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing tag from watches: {e}")
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
removed += 1
|
||||
watch['tags'].remove(uuid)
|
||||
|
||||
# Start daemon thread
|
||||
threading.Thread(target=remove_tag_background, args=(uuid,), daemon=True).start()
|
||||
|
||||
flash(gettext("Tag deleted, removing from watches in background"))
|
||||
flash(gettext("Tag deleted and removed from {} watches").format(removed))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/unlink/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/unlink/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def unlink(uuid):
|
||||
# Unlink tag from all watches in background thread to avoid blocking
|
||||
def unlink_tag_background(tag_uuid):
|
||||
"""Background thread to unlink tag from watches - discarded after completion."""
|
||||
unlinked_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
watch.commit()
|
||||
unlinked_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} unlinked from {unlinked_count} watches")
|
||||
except Exception as e:
|
||||
logger.error(f"Error unlinking tag from watches: {e}")
|
||||
unlinked = 0
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
unlinked += 1
|
||||
watch['tags'].remove(uuid)
|
||||
|
||||
# Start daemon thread
|
||||
threading.Thread(target=unlink_tag_background, args=(uuid,), daemon=True).start()
|
||||
|
||||
flash(gettext("Unlinking tag from watches in background"))
|
||||
flash(gettext("Tag unlinked removed from {} watches").format(unlinked))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete_all", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete_all():
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
watch['tags'] = []
|
||||
datastore.data['settings']['application']['tags'] = {}
|
||||
|
||||
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
|
||||
# TagsDict 'del' handler will remove the dir
|
||||
del datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
|
||||
|
||||
# Clear tags from all watches in background thread to avoid blocking
|
||||
def clear_all_tags_background():
|
||||
"""Background thread to clear tags from all watches - discarded after completion."""
|
||||
cleared_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
watch['tags'] = []
|
||||
watch.commit()
|
||||
cleared_count += 1
|
||||
logger.info(f"Background: Cleared tags from {cleared_count} watches")
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing tags from watches: {e}")
|
||||
|
||||
# Start daemon thread
|
||||
threading.Thread(target=clear_all_tags_background, daemon=True).start()
|
||||
|
||||
flash(gettext("All tags deleted, clearing from watches in background"))
|
||||
flash(gettext("All tags deleted"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def form_tag_edit(uuid):
|
||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||
@@ -160,21 +116,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
default_system_settings = datastore.data['settings'],
|
||||
)
|
||||
|
||||
# Bridge API-stored processor_config_* values into the form's FormField sub-forms.
|
||||
# The API stores processor_config_restock_diff in the tag dict; find the matching
|
||||
# FormField by checking which one's sub-fields cover the config keys.
|
||||
from wtforms.fields.form import FormField as WTFormField
|
||||
for key, value in default.items():
|
||||
if not key.startswith('processor_config_') or not isinstance(value, dict):
|
||||
continue
|
||||
for form_field in form:
|
||||
if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value):
|
||||
for sub_key, sub_value in value.items():
|
||||
sub_field = form_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
break
|
||||
|
||||
template_args = {
|
||||
'data': default,
|
||||
'form': form,
|
||||
@@ -218,17 +159,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return output
|
||||
|
||||
|
||||
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['POST'])
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def form_tag_edit_submit(uuid):
|
||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
|
||||
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
default = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
|
||||
form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None,
|
||||
data=tag,
|
||||
data=default,
|
||||
extra_notification_tokens=datastore.get_unique_notification_tokens_available()
|
||||
)
|
||||
# @todo subclass form so validation works
|
||||
@@ -237,18 +178,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# flash(','.join(l), 'error')
|
||||
# return redirect(url_for('tags.form_tag_edit_submit', uuid=uuid))
|
||||
|
||||
tag.update(form.data)
|
||||
tag['processor'] = 'restock_diff'
|
||||
tag.commit()
|
||||
|
||||
# Clear checksums for all watches using this tag to force reprocessing
|
||||
# Tag changes affect inherited configuration
|
||||
cleared_count = datastore.clear_checksums_for_tag(uuid)
|
||||
logger.info(f"Tag {uuid} updated, cleared {cleared_count} watch checksums")
|
||||
|
||||
datastore.data['settings']['application']['tags'][uuid].update(form.data)
|
||||
datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.needs_write_urgent = True
|
||||
flash(gettext("Updated"))
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
def form_tag_delete(uuid):
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
return tags_blueprint
|
||||
|
||||
@@ -50,8 +50,7 @@
|
||||
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
||||
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
|
||||
<td>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
|
||||
<a class="pure-button button-error"
|
||||
href="{{ url_for('tags.delete', uuid=uuid) }}"
|
||||
data-requires-confirm
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import time
|
||||
import threading
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session, current_app
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
@@ -10,7 +9,7 @@ from changedetectionio.blueprint.ui.notification import construct_blueprint as c
|
||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||
from changedetectionio.blueprint.ui import diff, preview
|
||||
|
||||
def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
from flask import request, flash
|
||||
|
||||
if op == 'delete':
|
||||
@@ -24,7 +23,6 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['paused'] = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches paused").format(len(uuids)))
|
||||
|
||||
@@ -32,7 +30,6 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches unpaused").format(len(uuids)))
|
||||
|
||||
@@ -47,7 +44,6 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches muted").format(len(uuids)))
|
||||
|
||||
@@ -55,7 +51,6 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches un-muted").format(len(uuids)))
|
||||
|
||||
@@ -63,7 +58,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Recheck and require a full reprocessing
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches queued for rechecking").format(len(uuids)))
|
||||
|
||||
@@ -71,7 +66,6 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]["last_error"] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches errors cleared").format(len(uuids)))
|
||||
|
||||
@@ -92,7 +86,6 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
datastore.data['watching'][uuid]['notification_body'] = None
|
||||
datastore.data['watching'][uuid]['notification_urls'] = []
|
||||
datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches set to use default notification settings").format(len(uuids)))
|
||||
|
||||
@@ -108,7 +101,6 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
datastore.data['watching'][uuid]['tags'] = []
|
||||
|
||||
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches were tagged").format(len(uuids)))
|
||||
|
||||
@@ -116,7 +108,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool, queuedWatchMetaData, watch_check_update):
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update):
|
||||
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
||||
|
||||
# Register the edit blueprint
|
||||
@@ -141,7 +133,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
# Import the login decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@ui_blueprint.route("/clear_history/<uuid_str:uuid>", methods=['GET'])
|
||||
@ui_blueprint.route("/clear_history/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def clear_watch_history(uuid):
|
||||
try:
|
||||
@@ -156,27 +148,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
@login_optionally_required
|
||||
def clear_all_history():
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext', '')
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext.strip().lower() == gettext('clear').strip().lower():
|
||||
# Run in background thread to avoid blocking
|
||||
def clear_history_background():
|
||||
# Capture UUIDs first to avoid race conditions
|
||||
watch_uuids = list(datastore.data['watching'].keys())
|
||||
logger.info(f"Background: Clearing history for {len(watch_uuids)} watches")
|
||||
|
||||
for uuid in watch_uuids:
|
||||
try:
|
||||
datastore.clear_watch_history(uuid)
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing history for watch {uuid}: {e}")
|
||||
|
||||
logger.info("Background: Completed clearing history")
|
||||
|
||||
# Start daemon thread
|
||||
threading.Thread(target=clear_history_background, daemon=True).start()
|
||||
|
||||
flash(gettext("History clearing started in background"))
|
||||
if confirmtext == 'clear':
|
||||
for uuid in datastore.data['watching'].keys():
|
||||
datastore.clear_watch_history(uuid)
|
||||
flash(gettext("Cleared snapshot history for all watches"))
|
||||
else:
|
||||
flash(gettext('Incorrect confirmation text.'), 'error')
|
||||
|
||||
@@ -192,37 +169,17 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
# Save the current newest history as the most recently viewed
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
tag_limit = request.args.get('tag')
|
||||
logger.debug(f"Limiting to tag {tag_limit}")
|
||||
now = int(time.time())
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
|
||||
# Mark watches as viewed - use background thread only for large watch counts
|
||||
def mark_viewed_impl():
|
||||
"""Mark watches as viewed - can run synchronously or in background thread."""
|
||||
marked_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
if tag_limit and ( not watch.get('tags') or tag_limit not in watch['tags'] ):
|
||||
logger.debug(f"Skipping watch {watch_uuid}")
|
||||
continue
|
||||
|
||||
if tag_limit and (not watch.get('tags') or tag_limit not in watch['tags']):
|
||||
continue
|
||||
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
marked_count += 1
|
||||
|
||||
logger.info(f"Marking complete: {marked_count} watches marked as viewed")
|
||||
except Exception as e:
|
||||
logger.error(f"Error marking as viewed: {e}")
|
||||
|
||||
# For small watch counts (< 10), run synchronously to avoid race conditions in tests
|
||||
# For larger counts, use background thread to avoid blocking the UI
|
||||
watch_count = len(datastore.data['watching'])
|
||||
if watch_count < 10:
|
||||
# Run synchronously for small watch counts
|
||||
mark_viewed_impl()
|
||||
else:
|
||||
# Start background thread for large watch counts
|
||||
thread = threading.Thread(target=mark_viewed_impl, daemon=True)
|
||||
thread.start()
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=tag_limit))
|
||||
|
||||
@@ -230,14 +187,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
@login_optionally_required
|
||||
def form_delete():
|
||||
uuid = request.args.get('uuid')
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if uuid != 'all' and not uuid in datastore.data['watching'].keys():
|
||||
flash(gettext('The watch by UUID {} does not exist.').format(uuid), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
datastore.delete(uuid)
|
||||
flash(gettext('Deleted.'))
|
||||
|
||||
@@ -247,14 +204,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
@login_optionally_required
|
||||
def form_clone():
|
||||
uuid = request.args.get('uuid')
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
new_uuid = datastore.clone(uuid)
|
||||
|
||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
|
||||
flash(gettext('Cloned, you are editing the new watch.'))
|
||||
|
||||
@@ -268,81 +225,38 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
uuid = request.args.get('uuid')
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
|
||||
i = 0
|
||||
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
if uuid:
|
||||
# Single watch - check if already queued or running
|
||||
if worker_pool.is_watch_running(uuid) or uuid in update_q.get_queued_uuids():
|
||||
flash(gettext("Watch is already queued or being checked."))
|
||||
else:
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
if uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
i += 1
|
||||
|
||||
else:
|
||||
# Multiple watches - first count how many need to be queued
|
||||
watches_to_queue = []
|
||||
# Recheck all, including muted
|
||||
# Get most overdue first
|
||||
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||
watch_uuid = k[0]
|
||||
watch = k[1]
|
||||
if not watch['paused'] and watch_uuid:
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
if tag != None and tag not in watch['tags']:
|
||||
continue
|
||||
watches_to_queue.append(watch_uuid)
|
||||
if not watch['paused']:
|
||||
if watch_uuid not in running_uuids:
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
if tag != None and tag not in watch['tags']:
|
||||
continue
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = []
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid not in queued_uuids and watch_uuid not in running_uuids:
|
||||
watches_to_queue_filtered.append(watch_uuid)
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
i += 1
|
||||
|
||||
# Queue only the filtered watches
|
||||
for watch_uuid in watches_to_queue_filtered:
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
if skipped_count > 0:
|
||||
flash(gettext("Queued {} watches for rechecking ({} already queued or running).").format(
|
||||
len(watches_to_queue_filtered), skipped_count))
|
||||
else:
|
||||
if len(watches_to_queue_filtered) == 1:
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
else:
|
||||
flash(gettext("Queued {} watches for rechecking.").format(len(watches_to_queue_filtered)))
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking HTTP response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_watches_background():
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
try:
|
||||
queued_count = 0
|
||||
skipped_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if watch_uuid not in queued_uuids and watch_uuid not in running_uuids:
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
logger.info(f"Background queueing complete: {queued_count} watches queued, {skipped_count} skipped (already queued/running)")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=queue_watches_background, daemon=True, name="QueueWatches-Background")
|
||||
thread.start()
|
||||
|
||||
# Return immediately with approximate message
|
||||
flash(gettext("Queueing watches for rechecking in background..."))
|
||||
if i == 1:
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
if i > 1:
|
||||
flash(gettext("Queued {} watches for rechecking.").format(i))
|
||||
if i == 0:
|
||||
flash(gettext("No watches available to recheck."))
|
||||
|
||||
return redirect(url_for('watchlist.index', **({'tag': tag} if tag else {})))
|
||||
|
||||
@@ -357,7 +271,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
extra_data=extra_data,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
uuids=uuids,
|
||||
worker_pool=worker_pool,
|
||||
worker_handler=worker_handler,
|
||||
update_q=update_q,
|
||||
watch_check_update=watch_check_update,
|
||||
op=op,
|
||||
@@ -366,7 +280,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
|
||||
@ui_blueprint.route("/share-url/<uuid_str:uuid>", methods=['GET'])
|
||||
@ui_blueprint.route("/share-url/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def form_share_put_watch(uuid):
|
||||
"""Given a watch UUID, upload the info and return a share-link
|
||||
@@ -375,6 +289,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
import json
|
||||
from copy import deepcopy
|
||||
|
||||
# more for testing
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
# copy it to memory as trim off what we dont need (history)
|
||||
watch = deepcopy(datastore.data['watching'].get(uuid))
|
||||
@@ -414,25 +331,4 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/language/auto-detect", methods=['GET'])
|
||||
def delete_locale_language_session_var_if_it_exists():
|
||||
"""Clear the session locale preference to auto-detect from browser Accept-Language header"""
|
||||
if 'locale' in session:
|
||||
session.pop('locale', None)
|
||||
# Refresh Flask-Babel to clear cached locale
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
flash(gettext("Language set to auto-detect from browser"))
|
||||
|
||||
# Check if there's a redirect parameter to return to the same page
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, use it
|
||||
from changedetectionio.is_safe_url import is_safe_url
|
||||
if redirect_url and is_safe_url(redirect_url, current_app):
|
||||
return redirect(redirect_url)
|
||||
|
||||
# Otherwise redirect to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return ui_blueprint
|
||||
@@ -66,7 +66,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return Markup(result)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
"""
|
||||
@@ -83,6 +83,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
If a processor doesn't have a difference module, falls back to text_json_diff.
|
||||
"""
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -100,21 +101,23 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have difference module, use text_json_diff as default
|
||||
from changedetectionio.processors.text_json_diff.difference import render as default_render
|
||||
@@ -128,7 +131,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_GET(uuid):
|
||||
"""
|
||||
@@ -141,10 +144,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/extract.py::render_form()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -154,21 +157,23 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
|
||||
# Call the processor's render_form() function
|
||||
if processor_module and hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
# Call the processor's render_form() function
|
||||
if hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import render_form as default_render_form
|
||||
@@ -182,7 +187,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['POST'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_POST(uuid):
|
||||
"""
|
||||
@@ -195,7 +200,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/extract.py::process_extraction()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -208,22 +213,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
|
||||
# Call the processor's process_extraction() function
|
||||
if processor_module and hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
# Call the processor's process_extraction() function
|
||||
if hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import process_extraction as default_process_extraction
|
||||
@@ -238,7 +245,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
@@ -260,7 +267,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
- /diff/{uuid}/processor-asset/after
|
||||
- /diff/{uuid}/processor-asset/rendered_diff
|
||||
"""
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -273,33 +280,38 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
|
||||
return diff_blueprint
|
||||
|
||||
@@ -9,7 +9,7 @@ from jinja2 import Environment, FileSystemLoader
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio.time_handler import is_within_schedule
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||
@@ -20,23 +20,24 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
|
||||
return True
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST'])
|
||||
@login_optionally_required
|
||||
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
||||
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
|
||||
def edit_page(uuid):
|
||||
from changedetectionio import forms
|
||||
from changedetectionio.browser_steps.browser_steps import browser_step_ui_config
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
|
||||
from changedetectionio import processors
|
||||
import importlib
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
# More for testing, possible to return the first/only
|
||||
if not datastore.data['watching'].keys():
|
||||
flash(gettext("No watches to edit"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if not uuid in datastore.data['watching']:
|
||||
flash(gettext("No watch with the UUID {} found.").format(uuid), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -51,13 +52,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
redirect(url_for('ui_edit.edit_page', uuid=uuid))
|
||||
|
||||
# be sure we update with a copy instead of accidently editing the live object by reference
|
||||
default = None
|
||||
while not default:
|
||||
try:
|
||||
default = deepcopy(datastore.data['watching'][uuid])
|
||||
except RuntimeError as e:
|
||||
# Dictionary changed
|
||||
continue
|
||||
default = deepcopy(datastore.data['watching'][uuid])
|
||||
|
||||
# Defaults for proxy choice
|
||||
if datastore.proxy_list is not None: # When enabled
|
||||
@@ -67,21 +62,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
default['proxy'] = ''
|
||||
# proxy_override set to the json/text list of the items
|
||||
|
||||
# browser_profile: None means "use system default" — map to 'system' so the radio pre-selects correctly
|
||||
if not default.get('browser_profile'):
|
||||
default['browser_profile'] = 'system'
|
||||
|
||||
# Does it use some custom form? does one exist?
|
||||
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
||||
if not processor_classes:
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing. Please select a different processor.").format(processor_name), 'error')
|
||||
# Fall back to default processor so user can still edit and change processor
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == 'text_json_diff'), None)
|
||||
if not processor_classes:
|
||||
# If even text_json_diff is missing, something is very wrong
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing.").format(processor_name), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
flash(gettext("Cannot load the edit form for processor/plugin '{}', plugin missing?").format(processor_classes[1]), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
parent_module = processors.get_parent_module(processor_classes[0])
|
||||
|
||||
@@ -121,59 +107,19 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||
|
||||
if processor_config:
|
||||
from wtforms.fields.form import FormField
|
||||
# Populate processor-config-* fields from JSON
|
||||
for config_key, config_value in processor_config.items():
|
||||
if not isinstance(config_value, dict):
|
||||
continue
|
||||
# Try exact API-named field first (e.g., processor_config_restock_diff)
|
||||
target_field = getattr(form, f'processor_config_{config_key}', None)
|
||||
# Fallback: find any FormField sub-form whose fields cover config_value keys
|
||||
if target_field is None:
|
||||
for form_field in form:
|
||||
if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value):
|
||||
target_field = form_field
|
||||
break
|
||||
if target_field is not None:
|
||||
for sub_key, sub_value in config_value.items():
|
||||
sub_field = target_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}")
|
||||
field_name = f'processor_config_{config_key}'
|
||||
if hasattr(form, field_name):
|
||||
getattr(form, field_name).data = config_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load processor config: {e}")
|
||||
|
||||
from changedetectionio.model.browser_profile import BrowserProfile
|
||||
from changedetectionio import content_fetchers as cf
|
||||
store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
for p in datastore.extra_browsers:
|
||||
form.fetch_backend.choices.append(p)
|
||||
|
||||
# Resolve the name of the system-level default profile for the label
|
||||
from changedetectionio.model.browser_profile import get_profile
|
||||
_system_default_machine_name = datastore.data['settings']['application'].get('browser_profile') or 'direct_http_requests'
|
||||
_all_store_profiles = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
_default_profile = get_profile(_system_default_machine_name, _all_store_profiles)
|
||||
if _default_profile:
|
||||
_system_label = gettext('System settings default') + ' \u2013 ' + _default_profile.name
|
||||
else:
|
||||
_system_label = gettext('System settings default')
|
||||
|
||||
# Choices: system default + always-present defaults (requests) + user-created profiles
|
||||
form.browser_profile.choices = [('system', _system_label)] + [
|
||||
(p.get_machine_name(), p.name)
|
||||
for p in cf.DEFAULT_BROWSER_PROFILES.values()
|
||||
] + [
|
||||
(machine_name, raw.get('name', machine_name) if isinstance(raw, dict) else machine_name)
|
||||
for machine_name, raw in store_profiles.items()
|
||||
]
|
||||
|
||||
# Build a map of machine_name → fetcher class name for the JS visibility system
|
||||
all_profiles = dict(cf.DEFAULT_BROWSER_PROFILES)
|
||||
for machine_name, raw in store_profiles.items():
|
||||
try:
|
||||
all_profiles[machine_name] = BrowserProfile(**raw) if isinstance(raw, dict) else raw
|
||||
except Exception:
|
||||
pass
|
||||
browser_profile_fetchers = {mn: p.get_fetcher_class_name() for mn, p in all_profiles.items()}
|
||||
form.fetch_backend.choices.append(("system", 'System settings default'))
|
||||
|
||||
# form.browser_steps[0] can be assumed that we 'goto url' first
|
||||
|
||||
@@ -198,10 +144,58 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
# IMPORTANT: These must NOT be saved to url-watches.json, only to the processor-specific JSON file
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(form.data)
|
||||
processors.save_processor_config(datastore, uuid, processor_config_data)
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
processor_config_data = {}
|
||||
fields_to_remove = []
|
||||
for field_name, field_value in form.data.items():
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
if field_value: # Only save non-empty values
|
||||
processor_config_data[config_key] = field_value
|
||||
fields_to_remove.append(field_name)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = form.data.get('processor')
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = processors.difference_detection_processor(datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
# Try to import the edit_hook module from the processor package
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
|
||||
watch_obj = datastore.data['watching'][uuid]
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
|
||||
# Remove processor-config-* fields from form.data before updating datastore
|
||||
for field_name in fields_to_remove:
|
||||
form.data.pop(field_name, None)
|
||||
|
||||
# Ignore text
|
||||
form_ignore_text = form.ignore_text.data
|
||||
@@ -241,19 +235,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Recast it if need be to right data Watch handler
|
||||
watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore, default=datastore.data['watching'][uuid])
|
||||
|
||||
# Save the watch immediately
|
||||
datastore.data['watching'][uuid].commit()
|
||||
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, default=datastore.data['watching'][uuid])
|
||||
flash(gettext("Updated watch - unpaused!") if request.args.get('unpause_on_save') else gettext("Updated watch."))
|
||||
|
||||
# Cleanup any browsersteps session for this watch
|
||||
try:
|
||||
from changedetectionio.blueprint.browser_steps import cleanup_session_for_watch
|
||||
cleanup_session_for_watch(uuid)
|
||||
except Exception as e:
|
||||
logger.debug(f"Error cleaning up browsersteps session: {e}")
|
||||
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
|
||||
# But in the case something is added we should save straight away
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Do not queue on edit if its not within the time range
|
||||
|
||||
@@ -283,7 +270,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
#############################
|
||||
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -311,23 +298,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Get fetcher capabilities instead of hardcoded logic
|
||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||
|
||||
# Add processor capabilities from module
|
||||
capabilities['supports_visual_selector'] = getattr(parent_module, 'supports_visual_selector', False)
|
||||
capabilities['supports_text_filters_and_triggers'] = getattr(parent_module, 'supports_text_filters_and_triggers', False)
|
||||
capabilities['supports_text_filters_and_triggers_elements'] = getattr(parent_module, 'supports_text_filters_and_triggers_elements', False)
|
||||
capabilities['supports_request_type'] = getattr(parent_module, 'supports_request_type', False)
|
||||
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
||||
|
||||
c = [f"processor-{watch.get('processor')}"]
|
||||
if worker_pool.is_watch_running(uuid):
|
||||
if worker_handler.is_watch_running(uuid):
|
||||
c.append('checking-now')
|
||||
|
||||
template_args = {
|
||||
'available_processors': processors.available_processors(),
|
||||
'available_timezones': sorted(available_timezones()),
|
||||
'browser_profile_fetchers': browser_profile_fetchers,
|
||||
'browser_steps_config': browser_step_ui_config,
|
||||
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
'extra_classes': ' '.join(c),
|
||||
@@ -346,6 +325,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'url': url_for('rss.rss_single_watch', uuid=watch['uuid'], token=app_rss_token)
|
||||
},
|
||||
'settings_application': datastore.data['settings']['application'],
|
||||
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
|
||||
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
|
||||
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
|
||||
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
|
||||
'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
||||
@@ -372,19 +353,17 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
return output
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-html", methods=['GET'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/get-html", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def watch_get_latest_html(uuid):
|
||||
from io import BytesIO
|
||||
from flask import send_file
|
||||
import brotli
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.data_dir):
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[-1]
|
||||
html_fname = os.path.join(watch.data_dir, f"{latest_filename}.html.br")
|
||||
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
@@ -399,65 +378,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Return a 500 error
|
||||
abort(500)
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-data-package", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def watch_get_data_package(uuid):
|
||||
"""Download all data for a single watch as a zip file"""
|
||||
from io import BytesIO
|
||||
from flask import send_file
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
import datetime
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404)
|
||||
|
||||
# Create zip in memory
|
||||
memory_file = BytesIO()
|
||||
|
||||
with zipfile.ZipFile(memory_file, 'w',
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8) as zipObj:
|
||||
|
||||
# Add the watch's JSON file if it exists
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
if os.path.isfile(watch_json_path):
|
||||
zipObj.write(watch_json_path,
|
||||
arcname=os.path.join(uuid, 'watch.json'),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
|
||||
# Add all files in the watch data directory
|
||||
if os.path.isdir(watch.data_dir):
|
||||
for f in Path(watch.data_dir).glob('*'):
|
||||
if f.is_file() and f.name != 'watch.json': # Skip watch.json since we already added it
|
||||
zipObj.write(f,
|
||||
arcname=os.path.join(uuid, f.name),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
|
||||
# Seek to beginning of file
|
||||
memory_file.seek(0)
|
||||
|
||||
# Generate filename with timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
filename = f"watch-data-{uuid[:8]}-{timestamp}.zip"
|
||||
|
||||
return send_file(memory_file,
|
||||
as_attachment=True,
|
||||
download_name=filename,
|
||||
mimetype='application/zip')
|
||||
|
||||
# Ajax callback
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def watch_get_preview_rendered(uuid):
|
||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||
from flask import jsonify
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
from changedetectionio.processors.text_json_diff import prepare_filter_prevew
|
||||
result = prepare_filter_prevew(watch_uuid=uuid, form_data=request.form, datastore=datastore)
|
||||
return jsonify(result)
|
||||
@@ -481,9 +407,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
s = re.sub(r'[0-9]+', r'\\d+', s)
|
||||
datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/')
|
||||
|
||||
# Save the updated ignore_text
|
||||
datastore.data["watching"][uuid].commit()
|
||||
|
||||
return f"<a href={url_for('ui.ui_preview.preview_page', uuid=uuid)}>Click to preview</a>"
|
||||
|
||||
return edit_blueprint
|
||||
@@ -118,7 +118,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
e_str = str(e)
|
||||
# Remove this text which is not important and floods the container
|
||||
e_str = e_str.replace(
|
||||
|
||||
@@ -10,8 +10,7 @@ from changedetectionio import html_tools
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
"""
|
||||
@@ -27,9 +26,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/preview.py::render()
|
||||
If a processor doesn't have a preview module, falls back to default text preview.
|
||||
"""
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -39,29 +39,36 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have preview module, use default text preview
|
||||
content = []
|
||||
versions = []
|
||||
timestamp = None
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
fetcher_supports_screenshots = watch.fetcher_supports_screenshots
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
triggered_line_numbers = []
|
||||
ignored_line_numbers = []
|
||||
@@ -71,9 +78,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
|
||||
|
||||
|
||||
preferred_version = request.args.get('version')
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
@@ -112,7 +117,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
highlight_triggered_line_numbers=triggered_line_numbers,
|
||||
highlight_blocked_line_numbers=blocked_line_numbers,
|
||||
history_n=watch.history_n,
|
||||
fetcher_supports_screenshots=fetcher_supports_screenshots,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
last_error=watch['last_error'],
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
@@ -124,7 +129,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return output
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@preview_blueprint.route("/preview/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
@@ -145,8 +150,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
"""
|
||||
from flask import make_response
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -156,33 +163,39 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
|
||||
return preview_blueprint
|
||||
|
||||
@@ -87,7 +87,7 @@
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div id="diff-jump" style="display:none;"><!-- disabled for now -->
|
||||
<div id="diff-jump">
|
||||
<a id="jump-next-diff" title="{{ _('Jump to next difference') }}">{{ _('Jump') }}</a>
|
||||
</div>
|
||||
|
||||
@@ -143,7 +143,7 @@
|
||||
<div class="tip">
|
||||
{{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}
|
||||
</div>
|
||||
{% if fetcher_supports_screenshots %}
|
||||
{% if is_html_webdriver %}
|
||||
{% if screenshot %}
|
||||
<div class="snapshot-age">{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
|
||||
<img style="max-width: 80%" id="screenshot-img" alt="{{ _('Current screenshot from most recent request') }}" >
|
||||
|
||||
@@ -27,8 +27,7 @@
|
||||
const proxy_recheck_status_url="{{url_for('check_proxies.get_recheck_status', uuid=uuid)}}";
|
||||
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
|
||||
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
|
||||
const default_system_fetch_backend = {{ (browser_profile_fetchers.get(settings_application.get('browser_profile') or 'direct_http_requests', 'requests')) | tojson }};
|
||||
const browserProfileFetcherMap = {{ browser_profile_fetchers | tojson }};
|
||||
const default_system_fetch_backend="{{ settings_application['fetch_backend'] }}";
|
||||
</script>
|
||||
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
|
||||
@@ -46,19 +45,14 @@
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="#general">{{ _('General') }}</a></li>
|
||||
{% if capabilities.supports_request_type %}
|
||||
<li class="tab"><a href="#request">{{ _('Request') }}</a></li>
|
||||
{% endif %}
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">{{ _('Browser Steps') }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
||||
{% endif %}
|
||||
@@ -116,35 +110,19 @@
|
||||
{{ _('Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.history_snapshot_max_length, class="history_snapshot_max_length") }}
|
||||
<span class="pure-form-message-inline">{{ _('Limit collection of history snapshots for each watch to this number of history items.') }}
|
||||
<br>
|
||||
{{ _('Set to empty to use system settings default') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_ternary_field(form.use_page_title_in_list) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if capabilities.supports_request_type %}
|
||||
<div class="tab-pane-inner" id="request">
|
||||
<div class="pure-control-group inline-radio">
|
||||
<div><label for="browser_profile">{{ form.browser_profile.label.text }}</label></div>
|
||||
<div><ul class="fetch-backend" id="browser_profile">
|
||||
{%- for subfield in form.browser_profile %}
|
||||
<li>
|
||||
{{ subfield() }}
|
||||
{{ browser_profile_fetchers.get(subfield.data, '')|fetcher_status_icons }}
|
||||
<label for="{{ subfield.id }}">{{ subfield.label.text }}</label>
|
||||
</li>
|
||||
{%- endfor %}
|
||||
</ul></div>
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>{{ _('Choose how this watch fetches its target URL. \'System settings default\' inherits the global setting.') }}</p>
|
||||
<p>{{ _('Manage browser profiles in') }} <a href="{{ url_for('settings.settings_browsers.index') }}">{{ _('Settings → Browsers') }}</a>.</p>
|
||||
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched site doesn\'t need Javascript to render.') }}</p>
|
||||
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.') }} </p>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
</span>
|
||||
</div>
|
||||
{% if form.proxy %}
|
||||
@@ -158,7 +136,7 @@
|
||||
{% endif %}
|
||||
|
||||
<!-- webdriver always -->
|
||||
<fieldset data-visible-for="fetch_backend=playwright fetch_backend=selenium fetch_backend=puppeteer fetch_backend=cloakbrowser" style="display: none;">
|
||||
<fieldset data-visible-for="fetch_backend=html_webdriver" style="display: none;">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.webdriver_delay) }}
|
||||
<div class="pure-form-message-inline">
|
||||
@@ -181,8 +159,8 @@
|
||||
</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
<!-- requests always -->
|
||||
<fieldset data-visible-for="fetch_backend=requests">
|
||||
<!-- html requests always -->
|
||||
<fieldset data-visible-for="fetch_backend=html_requests">
|
||||
<div class="pure-control-group">
|
||||
<a class="pure-button button-secondary button-xsmall show-advanced">{{ _('Show advanced options') }}</a>
|
||||
</div>
|
||||
@@ -219,17 +197,16 @@ Math: {{ 1 + 1 }}") }}
|
||||
({{ _('Not supported by Selenium browser') }})
|
||||
</div>
|
||||
</div>
|
||||
<fieldset data-visible-for="fetch_backend=requests fetch_backend=playwright fetch_backend=selenium fetch_backend=puppeteer fetch_backend=cloakbrowser" >
|
||||
<fieldset data-visible-for="fetch_backend=html_requests fetch_backend=html_webdriver" >
|
||||
<div class="pure-control-group inline-radio advanced-options" style="display: none;">
|
||||
{{ render_checkbox_field(form.ignore_status_codes) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="tab-pane-inner" id="browser-steps">
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
{% if true %}
|
||||
{% if visual_selector_data_ready %}
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
@@ -306,7 +283,8 @@ Math: {{ 1 + 1 }}") }}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
|
||||
<div class="tab-pane-inner" id="conditions">
|
||||
<script>
|
||||
const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}";
|
||||
@@ -325,9 +303,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
||||
<div>
|
||||
<div id="edit-text-filter">
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers_elements %}
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
<strong>{{ _('Pro-tips:') }}</strong><br>
|
||||
<ul>
|
||||
<li>
|
||||
@@ -338,8 +314,8 @@ Math: {{ 1 + 1 }}") }}
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{% include "edit/include_subtract.html" %}
|
||||
{% endif %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
@@ -398,7 +374,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ extra_form_content|safe }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
|
||||
@@ -410,7 +386,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ _('The Visual Selector tool lets you select the') }} <i>{{ _('text') }}</i> {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} <a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a> {{ _('tab. Use') }} <strong>{{ _('Shift+Click') }}</strong> {{ _('to select multiple items.') }}
|
||||
</span>
|
||||
|
||||
{% if watch['processor'] == 'image_ssim_diff' %} {# @todo, integrate with image_ssim_diff selector better, use some extra form ? #}
|
||||
{% if watch['processor'] == 'image_ssim_diff' %}
|
||||
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
|
||||
<label style="font-weight: 600; margin-right: 15px;">{{ _('Selection Mode:') }}</label>
|
||||
<label style="margin-right: 15px;">
|
||||
@@ -497,7 +473,6 @@ Math: {{ 1 + 1 }}") }}
|
||||
{% if watch.history_n %}
|
||||
<p>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">{{ _('Download latest HTML snapshot') }}</a>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_data_package', uuid=uuid)}}" class="pure-button button-small">{{ _('Download watch data package') }}</a>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
|
||||
<form class="pure-form " action="" method="POST">
|
||||
<fieldset>
|
||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||
name="from_version"
|
||||
@@ -28,7 +28,6 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||
|
||||
</fieldset>
|
||||
|
||||
@@ -2,7 +2,7 @@ from flask import Blueprint, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
|
||||
@@ -24,9 +24,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
flash(gettext('Warning, URL {} already exists').format(url), "notice")
|
||||
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
from changedetectionio import processors
|
||||
processor = request.form.get('processor', processors.get_default_processor())
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags','').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
processor = request.form.get('processor', 'text_json_diff')
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
|
||||
if new_uuid:
|
||||
if add_paused:
|
||||
@@ -34,9 +33,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||
else:
|
||||
# Straight into the queue.
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
flash(gettext("Watch added."))
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
||||
|
||||
return views_blueprint
|
||||
return views_blueprint
|
||||
@@ -2,8 +2,8 @@ import os
|
||||
import time
|
||||
|
||||
from flask import Blueprint, request, make_response, render_template, redirect, url_for, flash, session
|
||||
from flask_login import current_user
|
||||
from flask_paginate import Pagination, get_page_parameter
|
||||
from flask_babel import gettext as _
|
||||
|
||||
from changedetectionio import forms
|
||||
from changedetectionio import processors
|
||||
@@ -39,7 +39,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
elif op == 'mute':
|
||||
datastore.data['watching'][uuid].toggle_mute()
|
||||
|
||||
datastore.data['watching'][uuid].commit()
|
||||
datastore.needs_write = True
|
||||
return redirect(url_for('watchlist.index', tag = active_tag_uuid))
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
@@ -74,14 +74,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
pagination = Pagination(page=page,
|
||||
total=total_count,
|
||||
per_page=datastore.data['settings']['application'].get('pager_size', 50),
|
||||
css_framework="semantic",
|
||||
display_msg=_('displaying <b>{start} - {end}</b> {record_name} in total <b>{total}</b>'),
|
||||
record_name=_('records'))
|
||||
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
proxy_list = datastore.proxy_list
|
||||
output = render_template(
|
||||
"watch-overview.html",
|
||||
active_tag=active_tag,
|
||||
@@ -89,38 +85,25 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
datastore=datastore,
|
||||
errored_count=errored_count,
|
||||
extra_classes='has-queue' if not update_q.empty() else '',
|
||||
form=form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=proxy_list,
|
||||
has_proxies=datastore.proxy_list,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
processor_badge_css=processors.get_processor_badge_css(),
|
||||
processor_badge_texts=processors.get_processor_badge_texts(),
|
||||
processor_descriptions=processors.get_processor_descriptions(),
|
||||
queue_size=update_q.qsize(),
|
||||
queued_uuids=update_q.get_queued_uuids(),
|
||||
processor_badge_css=processors.get_processor_badge_css(),
|
||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||
search_q=request.args.get('q', '').strip(),
|
||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('browser_profile'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||
tags=sorted_tags,
|
||||
unread_changes_count=datastore.unread_changes_count,
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
# Return freed template-building memory to the OS immediately.
|
||||
# render_template allocates ~20MB of intermediate strings that are freed on return,
|
||||
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
|
||||
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if session.get('share-link'):
|
||||
del (session['share-link'])
|
||||
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
{%- extends 'base.html' -%}
|
||||
{%- block content -%}
|
||||
{%- set tips = [
|
||||
_("Changedetection.io can monitor more than just web-pages! See our plugins!") ~ ' <a href="https://changedetection.io/plugins">' ~ _('More info') ~ '</a>',
|
||||
_("You can also add 'shared' watches.") ~ ' <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">' ~ _('More info') ~ '</a>'
|
||||
] -%}
|
||||
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
@@ -14,46 +10,6 @@
|
||||
// Initialize Feather icons after the page loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
feather.replace();
|
||||
|
||||
// Intersection Observer for lazy loading favicons
|
||||
// Only load favicon images when they enter the viewport
|
||||
if ('IntersectionObserver' in window) {
|
||||
const faviconObserver = new IntersectionObserver((entries, observer) => {
|
||||
entries.forEach(entry => {
|
||||
if (entry.isIntersecting) {
|
||||
const img = entry.target;
|
||||
const src = img.getAttribute('data-src');
|
||||
|
||||
if (src) {
|
||||
// Load the actual favicon
|
||||
img.src = src;
|
||||
img.removeAttribute('data-src');
|
||||
}
|
||||
|
||||
// Stop observing this image
|
||||
observer.unobserve(img);
|
||||
}
|
||||
});
|
||||
}, {
|
||||
// Start loading slightly before the image enters viewport
|
||||
rootMargin: '50px',
|
||||
threshold: 0.01
|
||||
});
|
||||
|
||||
// Observe all lazy favicon images
|
||||
document.querySelectorAll('.lazy-favicon').forEach(img => {
|
||||
faviconObserver.observe(img);
|
||||
});
|
||||
} else {
|
||||
// Fallback for older browsers: load all favicons immediately
|
||||
document.querySelectorAll('.lazy-favicon').forEach(img => {
|
||||
const src = img.getAttribute('data-src');
|
||||
if (src) {
|
||||
img.src = src;
|
||||
img.removeAttribute('data-src');
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<style>
|
||||
@@ -66,33 +22,6 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
|
||||
/* Auto-generated processor badge colors */
|
||||
{{ processor_badge_css|safe }}
|
||||
|
||||
/* Auto-generated tag colors */
|
||||
{%- for uuid, tag in tags -%}
|
||||
{%- if tag and tag.title -%}
|
||||
{%- set class_name = tag.title|sanitize_tag_class -%}
|
||||
{%- set colors = generate_tag_colors(tag.title) -%}
|
||||
.button-tag.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
color: {{ colors['light']['color'] }};
|
||||
}
|
||||
|
||||
.watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['light']['bg'] }};
|
||||
color: {{ colors['light']['color'] }};
|
||||
}
|
||||
|
||||
html[data-darkmode="true"] .button-tag.tag-{{ class_name }} {
|
||||
background-color: {{ colors['dark']['bg'] }};
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
|
||||
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
background-color: {{ colors['dark']['bg'] }};
|
||||
color: {{ colors['dark']['color'] }};
|
||||
}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</style>
|
||||
<div class="box" id="form-quick-watch-add">
|
||||
|
||||
@@ -106,16 +35,14 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title=_("Edit first then Watch") ) }}
|
||||
</div>
|
||||
<div id="watch-group-tag">
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder=_("Watch group / tag"), class="transparent-field") }}
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="Watch group / tag", class="transparent-field") }}
|
||||
</div>
|
||||
<div id="quick-watch-processor-type">
|
||||
{{ render_simple_field(form.processor) }}
|
||||
</div>
|
||||
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;">
|
||||
<strong>Tip: </strong> {{ tips | random | safe }}
|
||||
</span>
|
||||
<span style="color:#eee; font-size: 80%;"><img alt="{{ _('Create a shareable link') }}" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > {{ _("Tip: You can also add 'shared' watches.") }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">{{ _('More info') }}</a></span>
|
||||
</form>
|
||||
</div>
|
||||
<div class="box">
|
||||
@@ -145,14 +72,9 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to delete the selected watches?</strong></p><p>This action cannot be undone.</p>') }}"
|
||||
data-confirm-button="{{ _('Delete') }}"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Delete') }}</button>
|
||||
</div>
|
||||
|
||||
<div id="stats_row">
|
||||
<div class="left">{%- if watches|length >= pagination.per_page -%}{{ pagination.info }}{%- endif -%}</div>
|
||||
<div class="right" >{{ _('Queued size') }}: <span id="queue-size-int">{{ queue_size }}</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
{%- if watches|length >= pagination.per_page -%}
|
||||
{{ pagination.info }}
|
||||
{%- endif -%}
|
||||
{%- if search_q -%}<div id="search-result-info">{{ _('Searching') }} "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
|
||||
<div>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">{{ _('All') }}</a>
|
||||
@@ -160,7 +82,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
<!-- tag list -->
|
||||
{%- for uuid, tag in tags -%}
|
||||
{%- if tag != "" -%}
|
||||
<a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag tag-{{ tag.title|sanitize_tag_class }} {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
|
||||
<a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
@@ -205,7 +127,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
<tbody>
|
||||
{%- if not watches|length -%}
|
||||
<tr>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">{{ _('No web page change detection watches configured, please add a URL in the box above, or') }} <a href="{{ url_for('imports.import_page')}}" >{{ _('import a list') }}</a>.</td>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">{{ _('No website watches configured, please add a URL in the box above, or') }} <a href="{{ url_for('imports.import_page')}}" >{{ _('import a list') }}</a>.</td>
|
||||
</tr>
|
||||
{%- endif -%}
|
||||
|
||||
@@ -213,13 +135,12 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- set checking_now = is_checking_now(watch) -%}
|
||||
{%- set history_n = watch.history_n -%}
|
||||
{%- set favicon = watch.get_favicon_filename() -%}
|
||||
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
|
||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{%- set row_classes = [
|
||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||
'processor-' ~ watch['processor'],
|
||||
'has-error' if error_texts|length > 2 else '',
|
||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||
'unviewed' if watch.has_unviewed else '',
|
||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||
@@ -247,45 +168,38 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
<td class="title-col inline">
|
||||
<div class="flex-wrapper">
|
||||
{% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
|
||||
<div>
|
||||
{# Intersection Observer lazy loading: store real URL in data-src, load only when visible in viewport #}
|
||||
<img alt="Favicon thumbnail"
|
||||
class="favicon lazy-favicon"
|
||||
loading="lazy"
|
||||
decoding="async"
|
||||
fetchpriority="low"
|
||||
{% if favicon %}
|
||||
data-src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}"
|
||||
{% endif %}
|
||||
src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E'>
|
||||
<div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder' #}
|
||||
<img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {% endif %} />
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<a href="{{url_for('watchlist.index', tag=watch_tag_uuid) }}" class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</a>
|
||||
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
{{ watch.effective_browser_profile.get_fetcher_class_name()|fetcher_status_icons }}
|
||||
{%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
|
||||
{%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
|
||||
{{ effective_fetcher|fetcher_status_icons }}
|
||||
{%- endif -%}
|
||||
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
|
||||
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
|
||||
|
||||
@@ -302,20 +216,11 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
</span>
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch.get('restock') and watch['restock'].get('price') -%}
|
||||
{%- set restock = watch['restock'] -%}
|
||||
{%- set price = restock.get('price') -%}
|
||||
{%- set cur = restock.get('currency','') -%}
|
||||
|
||||
{%- if price is not none and (price|string)|regex_search('\d') -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
|
||||
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
|
||||
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
|
||||
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
|
||||
{{ price }} {{ cur }}<!-- as string -->
|
||||
{%- endif -%}
|
||||
</span>
|
||||
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
|
||||
{%- if watch['restock']['price'] != None -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">{{ _('No information') }}</span>
|
||||
|
||||
@@ -25,88 +25,87 @@ SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_
|
||||
# Most modern GPUs support 16384x16384 textures, so 1280x10000 is safe
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD = int(os.getenv("SCREENSHOT_CHUNK_HEIGHT", 10000))
|
||||
|
||||
# available_fetchers() will scan this implementation looking for anything starting with html_
|
||||
# this information is used in the form selections
|
||||
from changedetectionio.content_fetchers.requests import fetcher as html_requests
|
||||
|
||||
|
||||
import importlib.resources
|
||||
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||
FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8')
|
||||
|
||||
|
||||
# Registry: clean fetcher name → fetcher class (e.g. 'requests', 'playwright', 'cloakbrowser')
|
||||
FETCHERS: dict = {}
|
||||
|
||||
|
||||
def register_fetcher(name: str, cls) -> None:
|
||||
"""Register a fetcher class under its clean name (no html_ prefix)."""
|
||||
FETCHERS[name] = cls
|
||||
|
||||
|
||||
def get_fetcher(name: str):
|
||||
"""Return the fetcher class for a clean name, or None."""
|
||||
return FETCHERS.get(name)
|
||||
|
||||
|
||||
def available_fetchers():
|
||||
"""Return list of (name, description) for all registered fetchers."""
|
||||
return [(name, cls.fetcher_description) for name, cls in FETCHERS.items()
|
||||
if hasattr(cls, 'fetcher_description')]
|
||||
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
||||
import inspect
|
||||
p = []
|
||||
|
||||
# Get built-in fetchers (but skip plugin fetchers that were added via setattr)
|
||||
for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
|
||||
if inspect.isclass(obj):
|
||||
# @todo html_ is maybe better as fetcher_ or something
|
||||
# In this case, make sure to edit the default one in store.py and fetch_site_status.py
|
||||
if name.startswith('html_'):
|
||||
# Skip plugin fetchers that were already registered
|
||||
if name not in _plugin_fetchers:
|
||||
t = tuple([name, obj.fetcher_description])
|
||||
p.append(t)
|
||||
|
||||
# Get plugin fetchers from cache (already loaded at module init)
|
||||
for name, fetcher_class in _plugin_fetchers.items():
|
||||
if hasattr(fetcher_class, 'fetcher_description'):
|
||||
t = tuple([name, fetcher_class.fetcher_description])
|
||||
p.append(t)
|
||||
else:
|
||||
logger.warning(f"Plugin fetcher '{name}' does not have fetcher_description attribute")
|
||||
|
||||
return p
|
||||
|
||||
|
||||
def available_browser_fetchers():
|
||||
"""Return list of (name, description) for fetchers that support screenshots (browser-type fetchers)."""
|
||||
return [(name, cls.fetcher_description) for name, cls in FETCHERS.items()
|
||||
if cls.supports_screenshots]
|
||||
def get_plugin_fetchers():
|
||||
"""Load and return all plugin fetchers from the centralized plugin manager."""
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
|
||||
|
||||
def _load_fetchers():
|
||||
"""Load all fetchers (built-ins + plugins) into the FETCHERS registry."""
|
||||
from changedetectionio.pluggy_interface import plugin_manager, register_builtin_fetchers
|
||||
|
||||
# Built-ins must be registered first
|
||||
register_builtin_fetchers()
|
||||
|
||||
# Then external plugins
|
||||
fetchers = {}
|
||||
try:
|
||||
# Call the register_content_fetcher hook from all registered plugins
|
||||
results = plugin_manager.hook.register_content_fetcher()
|
||||
for result in results:
|
||||
if result:
|
||||
name, fetcher_class = result
|
||||
register_fetcher(name, fetcher_class)
|
||||
logger.info(f"Registered fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', '?')}")
|
||||
fetchers[name] = fetcher_class
|
||||
# Register in current module so hasattr() checks work
|
||||
setattr(sys.modules[__name__], name, fetcher_class)
|
||||
logger.info(f"Registered plugin fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', 'No description')}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading plugin fetchers: {e}")
|
||||
|
||||
|
||||
def get_active_browser_fetcher_name() -> str:
|
||||
"""Return the clean name of the browser fetcher activated by environment config.
|
||||
|
||||
- ``PLAYWRIGHT_DRIVER_URL`` set + ``FAST_PUPPETEER_CHROME_FETCHER=False`` → ``playwright_cdp``
|
||||
- ``PLAYWRIGHT_DRIVER_URL`` set + ``FAST_PUPPETEER_CHROME_FETCHER=True`` → ``puppeteer``
|
||||
- Neither set → ``selenium``
|
||||
"""
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL', False):
|
||||
if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')):
|
||||
return 'playwright_cdp'
|
||||
return 'puppeteer'
|
||||
return 'selenium'
|
||||
return fetchers
|
||||
|
||||
|
||||
# Default browser profiles always shown in the browser profiles table (keyed by machine name)
|
||||
DEFAULT_BROWSER_PROFILES: dict = {}
|
||||
# Initialize plugins at module load time
|
||||
_plugin_fetchers = get_plugin_fetchers()
|
||||
|
||||
|
||||
def _register_default_browser_profiles():
|
||||
"""Register browser profiles that are always present in the profiles table."""
|
||||
from changedetectionio.model.browser_profile import BUILTIN_REQUESTS
|
||||
DEFAULT_BROWSER_PROFILES[BUILTIN_REQUESTS.get_machine_name()] = BUILTIN_REQUESTS
|
||||
# Decide which is the 'real' HTML webdriver, this is more a system wide config
|
||||
# rather than site-specific.
|
||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||||
if use_playwright_as_chrome_fetcher:
|
||||
# @note - For now, browser steps always uses playwright
|
||||
if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')):
|
||||
logger.debug('Using Playwright library as fetcher')
|
||||
from .playwright import fetcher as html_webdriver
|
||||
else:
|
||||
logger.debug('Using direct Python Puppeteer library as fetcher')
|
||||
from .puppeteer import fetcher as html_webdriver
|
||||
|
||||
else:
|
||||
logger.debug("Falling back to selenium as fetcher")
|
||||
from .webdriver_selenium import fetcher as html_webdriver
|
||||
|
||||
|
||||
# Populate the registry at module load time
|
||||
_load_fetchers()
|
||||
|
||||
# Backwards-compat alias: stored data may reference 'playwright' (pre-refactor name).
|
||||
# Map it to playwright_cdp which is the CDP-based fetcher that replaced it.
|
||||
if 'playwright_cdp' in FETCHERS and 'playwright' not in FETCHERS:
|
||||
FETCHERS['playwright'] = FETCHERS['playwright_cdp']
|
||||
|
||||
_register_default_browser_profiles()
|
||||
# Register built-in fetchers as plugins after all imports are complete
|
||||
from changedetectionio.pluggy_interface import register_builtin_fetchers
|
||||
register_builtin_fetchers()
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ def manage_user_agent(headers, current_ua=''):
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class Fetcher():
|
||||
browser_connection_is_custom = None
|
||||
browser_connection_url = None
|
||||
@@ -70,38 +71,28 @@ class Fetcher():
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
# Icon shown in the watch list when this fetcher is the effective fetcher.
|
||||
# Set to a dict with 'filename', 'alt', 'title' keys (image served from static/images/).
|
||||
# None means no icon is shown (e.g. plain HTTP requests fetcher).
|
||||
status_icon = None
|
||||
|
||||
# Screenshot element locking - prevents layout shifts during screenshot capture
|
||||
# Only needed for visual comparison (image_ssim_diff processor)
|
||||
# Locks element dimensions in the first viewport to prevent headers/ads from resizing
|
||||
lock_viewport_elements = False # Default: disabled for performance
|
||||
|
||||
# BrowserProfile-derived settings — applied by browser fetchers, ignored by html_requests
|
||||
viewport_width: int = 1280
|
||||
viewport_height: int = 1000
|
||||
block_images: bool = False
|
||||
block_fonts: bool = False
|
||||
profile_user_agent: str = None # Profile-level UA; lower priority than request_headers User-Agent
|
||||
ignore_https_errors: bool = False
|
||||
locale: str = None
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
if kwargs and 'lock_viewport_elements' in kwargs:
|
||||
self.lock_viewport_elements = kwargs.get('lock_viewport_elements')
|
||||
|
||||
# BrowserProfile fields — store whatever was passed, subclasses use them
|
||||
for field in ('viewport_width', 'viewport_height', 'block_images', 'block_fonts',
|
||||
'profile_user_agent', 'ignore_https_errors', 'locale'):
|
||||
if field in kwargs:
|
||||
setattr(self, field, kwargs[field])
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return data for status icon to display in the watch overview.
|
||||
|
||||
This method can be overridden by subclasses to provide custom status icons.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with icon data:
|
||||
{
|
||||
'filename': 'icon-name.svg', # Icon filename
|
||||
'alt': 'Alt text', # Alt attribute
|
||||
'title': 'Tooltip text', # Title attribute
|
||||
'style': 'height: 1em;' # Optional inline CSS
|
||||
}
|
||||
Or None if no icon
|
||||
"""
|
||||
return None
|
||||
|
||||
def clear_content(self):
|
||||
"""
|
||||
@@ -163,16 +154,30 @@ class Fetcher():
|
||||
"""
|
||||
return {k.lower(): v for k, v in self.headers.items()}
|
||||
|
||||
def browser_steps_get_valid_steps(self):
|
||||
if self.browser_steps is not None and len(self.browser_steps):
|
||||
valid_steps = list(filter(
|
||||
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one'),
|
||||
self.browser_steps))
|
||||
|
||||
# Just incase they selected Goto site by accident with older JS
|
||||
if valid_steps and valid_steps[0]['operation'] == 'Goto site':
|
||||
del(valid_steps[0])
|
||||
|
||||
return valid_steps
|
||||
|
||||
return None
|
||||
|
||||
async def iterate_browser_steps(self, start_url=None):
|
||||
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface, browser_steps_get_valid_steps
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||
from playwright._impl._errors import TimeoutError, Error
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
step_n = 0
|
||||
|
||||
if self.browser_steps:
|
||||
if self.browser_steps is not None and len(self.browser_steps):
|
||||
interface = steppable_browser_interface(start_url=start_url)
|
||||
interface.page = self.page
|
||||
valid_steps = browser_steps_get_valid_steps(self.browser_steps)
|
||||
valid_steps = self.browser_steps_get_valid_steps()
|
||||
|
||||
for step in valid_steps:
|
||||
step_n += 1
|
||||
@@ -199,16 +204,6 @@ class Fetcher():
|
||||
# Stop processing here
|
||||
raise BrowserStepsStepException(step_n=step_n, original_e=e)
|
||||
|
||||
def disk_cleanup_after_fetch(self):
|
||||
"""Remove any temporary files written to disk during a fetch.
|
||||
|
||||
The default implementation is a no-op. Browser-based fetchers
|
||||
override this to delete browser-step screenshots and any other
|
||||
ephemeral files they create. Called by the processor after
|
||||
``quit()`` regardless of whether the fetch succeeded or failed.
|
||||
"""
|
||||
pass
|
||||
|
||||
# It's always good to reset these
|
||||
def delete_browser_steps_screenshots(self):
|
||||
import glob
|
||||
|
||||
406
changedetectionio/content_fetchers/playwright.py
Normal file
406
changedetectionio/content_fetchers/playwright.py
Normal file
@@ -0,0 +1,406 @@
|
||||
import json
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport_size
|
||||
|
||||
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
|
||||
|
||||
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# Only scroll if not at the top (y > 0)
|
||||
if y > 0:
|
||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||
|
||||
# Request GC only before screenshot (not 3x per chunk)
|
||||
await page.request_gc()
|
||||
|
||||
screenshot_kwargs = {
|
||||
'type': screenshot_type,
|
||||
'full_page': False
|
||||
}
|
||||
# Only pass quality parameter for jpeg (PNG doesn't support it in Playwright)
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
y += step_size
|
||||
|
||||
# Restore original viewport size
|
||||
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
|
||||
# Unlock element dimensions if they were locked
|
||||
if elements_locked:
|
||||
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'unlock-elements-sizing.js')
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
|
||||
# If we have multiple chunks, stitch them together
|
||||
if len(screenshot_chunks) > 1:
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
|
||||
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
|
||||
# Only use separate process for many chunks (4+) to avoid blocking the event loop
|
||||
if len(screenshot_chunks) <= 3:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
|
||||
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
else:
|
||||
# Use separate process for many chunks to avoid blocking
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
# Explicit cleanup
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
# Explicit cleanup
|
||||
del screenshot_chunks
|
||||
screenshot_chunks = None
|
||||
return screenshot
|
||||
|
||||
logger.debug(
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
return screenshot_chunks[0]
|
||||
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Playwright {}/Javascript".format(
|
||||
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
||||
)
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
|
||||
|
||||
browser_type = ''
|
||||
command_executor = ''
|
||||
|
||||
# Configs for Proxy setup
|
||||
# In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
|
||||
playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
|
||||
|
||||
proxy = None
|
||||
|
||||
# Capability flags
|
||||
supports_browser_steps = True
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for Playwright fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
||||
|
||||
if custom_browser_connection_url:
|
||||
self.browser_connection_is_custom = True
|
||||
self.browser_connection_url = custom_browser_connection_url
|
||||
else:
|
||||
# Fallback to fetching from system
|
||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||
self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"')
|
||||
|
||||
# If any proxy settings are enabled, then we should setup the proxy object
|
||||
proxy_args = {}
|
||||
for k in self.playwright_proxy_settings_mappings:
|
||||
v = os.getenv('playwright_proxy_' + k, False)
|
||||
if v:
|
||||
proxy_args[k] = v.strip('"')
|
||||
|
||||
if proxy_args:
|
||||
self.proxy = proxy_args
|
||||
|
||||
# allow per-watch proxy selection override
|
||||
if proxy_override:
|
||||
self.proxy = {'server': proxy_override}
|
||||
|
||||
if self.proxy:
|
||||
# Playwright needs separate username and password values
|
||||
parsed = urlparse(self.proxy.get('server'))
|
||||
if parsed.username:
|
||||
self.proxy['username'] = parsed.username
|
||||
self.proxy['password'] = parsed.password
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
|
||||
if self.browser_steps_screenshot_path is not None:
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
||||
logger.debug(f"Saving step screenshot to {destination}")
|
||||
with open(destination, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
|
||||
async def save_step_html(self, step_n):
|
||||
super().save_step_html(step_n=step_n)
|
||||
content = await self.page.content()
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
||||
logger.debug(f"Saving step HTML to {destination}")
|
||||
with open(destination, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
import playwright._impl._errors
|
||||
import time
|
||||
self.delete_browser_steps_screenshots()
|
||||
response = None
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser_type = getattr(p, self.browser_type)
|
||||
|
||||
# Seemed to cause a connection Exception even tho I can see it connect
|
||||
# self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
|
||||
# 60,000 connection timeout only
|
||||
browser = await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
|
||||
|
||||
# SOCKS5 with authentication is not supported (yet)
|
||||
# https://github.com/microsoft/playwright/issues/10567
|
||||
|
||||
# Set user agent to prevent Cloudflare from blocking the browser
|
||||
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
||||
context = await browser.new_context(
|
||||
accept_downloads=False, # Should never be needed
|
||||
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
|
||||
extra_http_headers=request_headers,
|
||||
ignore_https_errors=True,
|
||||
proxy=self.proxy,
|
||||
service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'), # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
|
||||
user_agent=manage_user_agent(headers=request_headers),
|
||||
)
|
||||
|
||||
self.page = await context.new_page()
|
||||
|
||||
# Listen for all console events and handle errors
|
||||
self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
||||
|
||||
# Re-use as much code from browser steps as possible so its the same
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||
browsersteps_interface = steppable_browser_interface(start_url=url)
|
||||
browsersteps_interface.page = self.page
|
||||
|
||||
response = await browsersteps_interface.action_goto_url(value=url)
|
||||
|
||||
if response is None:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
logger.debug("Content Fetcher > Response object from the browser communication was none")
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
# In async_playwright, all_headers() returns a coroutine
|
||||
try:
|
||||
self.headers = await response.all_headers()
|
||||
except TypeError:
|
||||
# Fallback for sync version
|
||||
self.headers = response.all_headers()
|
||||
|
||||
try:
|
||||
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
|
||||
await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
|
||||
except playwright._impl._errors.TimeoutError as e:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
# This can be ok, we will try to grab what we could retrieve
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}")
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
try:
|
||||
self.status_code = response.status
|
||||
except Exception as e:
|
||||
# https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
|
||||
logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
|
||||
logger.critical(response)
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if fetch_favicon:
|
||||
try:
|
||||
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
||||
await self.page.request_gc()
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
# Run Browser Steps here
|
||||
if self.browser_steps_get_valid_steps():
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
now = time.time()
|
||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||
if current_include_filters is not None:
|
||||
await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
else:
|
||||
await self.page.evaluate("var include_filters=''")
|
||||
await self.page.request_gc()
|
||||
|
||||
# request_gc before and after evaluate to free up memory
|
||||
# @todo browsersteps etc
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
})
|
||||
await self.page.request_gc()
|
||||
|
||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
||||
await self.page.request_gc()
|
||||
|
||||
self.content = await self.page.content()
|
||||
await self.page.request_gc()
|
||||
logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
|
||||
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
# JPEG is better here because the screenshots can be very very large
|
||||
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
try:
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
finally:
|
||||
# Request garbage collection one more time before closing
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Clean up resources properly
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
await self.page.close()
|
||||
except:
|
||||
pass
|
||||
self.page = None
|
||||
|
||||
try:
|
||||
await context.close()
|
||||
except:
|
||||
pass
|
||||
context = None
|
||||
|
||||
try:
|
||||
await browser.close()
|
||||
except:
|
||||
pass
|
||||
browser = None
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class PlaywrightFetcherPlugin:
|
||||
"""Plugin class that registers the Playwright fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the Playwright fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
playwright_plugin = PlaywrightFetcherPlugin()
|
||||
|
||||
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
"""
|
||||
Playwright CDP fetcher — connects to a remote browser via Chrome DevTools Protocol.
|
||||
|
||||
This is the original "playwright" fetcher, renamed to make the connection
|
||||
method explicit. The PLAYWRIGHT_DRIVER_URL env var (or per-profile
|
||||
browser_connection_url) points to a running Chrome/Chromium container that
|
||||
exposes the CDP WebSocket endpoint (e.g. ws://playwright-chrome:3000).
|
||||
"""
|
||||
import os
|
||||
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
|
||||
|
||||
|
||||
class fetcher(PlaywrightBaseFetcher):
|
||||
fetcher_description = "Playwright Chrome (CDP/Remote)"
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(proxy_override=proxy_override, custom_browser_connection_url=custom_browser_connection_url, **kwargs)
|
||||
|
||||
if custom_browser_connection_url:
|
||||
self.browser_connection_is_custom = True
|
||||
self.browser_connection_url = custom_browser_connection_url
|
||||
else:
|
||||
self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"')
|
||||
|
||||
# CDP always talks to Chromium; respect PLAYWRIGHT_BROWSER_TYPE for exotic setups
|
||||
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
||||
|
||||
async def _connect_browser(self, p):
|
||||
browser_type = getattr(p, self.browser_type)
|
||||
return await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60_000)
|
||||
|
||||
|
||||
class PlaywrightCDPPlugin:
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
return ('playwright_cdp', fetcher)
|
||||
|
||||
|
||||
cdp_plugin = PlaywrightCDPPlugin()
|
||||
@@ -1,398 +0,0 @@
|
||||
"""
|
||||
Playwright-based content fetchers.
|
||||
|
||||
Submodules:
|
||||
cdp — connect to a remote browser via Chrome DevTools Protocol (CDP/WebSocket)
|
||||
chrome — launch a local Chromium browser
|
||||
firefox — launch a local Firefox browser
|
||||
webkit — launch a local WebKit (Safari-engine) browser
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import (
|
||||
SCREENSHOT_MAX_HEIGHT_DEFAULT,
|
||||
SCREENSHOT_MAX_TOTAL_HEIGHT,
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD,
|
||||
FAVICON_FETCHER_JS,
|
||||
INSTOCK_DATA_JS,
|
||||
XPATH_ELEMENT_JS,
|
||||
visualselector_xpath_selectors,
|
||||
)
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import (
|
||||
BrowserStepsStepException,
|
||||
EmptyReply,
|
||||
Non200ErrorCodeReceived,
|
||||
PageUnloadable,
|
||||
ScreenshotUnavailable,
|
||||
)
|
||||
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport_size
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
if lock_viewport_elements and page_height > page.viewport_size['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), '..', 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled (took {time.time() - lock_start:.2f}s)")
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
if y > 0:
|
||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||
|
||||
await _safe_request_gc(page)
|
||||
|
||||
screenshot_kwargs = {'type': screenshot_type, 'full_page': False}
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
|
||||
if elements_locked:
|
||||
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), '..', 'res', 'unlock-elements-sizing.js')
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
stitch_start = time.time()
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px | "
|
||||
f"Setup: {total_time - capture_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot_chunks[0]
|
||||
|
||||
|
||||
async def _safe_request_gc(page):
|
||||
"""Request browser GC — Chromium-specific, silently ignored on Firefox/WebKit."""
|
||||
try:
|
||||
await page.request_gc()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class PlaywrightBaseFetcher(Fetcher):
|
||||
"""
|
||||
Shared base for all Playwright fetchers.
|
||||
|
||||
Subclasses implement ``_connect_browser(playwright_instance)`` to return a
|
||||
connected-or-launched browser object. Everything else — context creation,
|
||||
page interaction, screenshot capture, browser-steps execution — lives here.
|
||||
"""
|
||||
|
||||
playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
|
||||
|
||||
proxy = None
|
||||
|
||||
# Capability flags
|
||||
supports_browser_steps = True
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# Subclasses may use this (e.g. CDP); others ignore it
|
||||
self._custom_browser_connection_url = custom_browser_connection_url
|
||||
|
||||
proxy_args = {}
|
||||
for k in self.playwright_proxy_settings_mappings:
|
||||
v = os.getenv('playwright_proxy_' + k, False)
|
||||
if v:
|
||||
proxy_args[k] = v.strip('"')
|
||||
|
||||
if proxy_args:
|
||||
self.proxy = proxy_args
|
||||
|
||||
if proxy_override:
|
||||
self.proxy = {'server': proxy_override}
|
||||
|
||||
if self.proxy:
|
||||
parsed = urlparse(self.proxy.get('server', ''))
|
||||
if parsed.username:
|
||||
self.proxy['username'] = parsed.username
|
||||
self.proxy['password'] = parsed.password
|
||||
|
||||
def disk_cleanup_after_fetch(self):
|
||||
"""Delete browser-step screenshots written during this fetch."""
|
||||
self.delete_browser_steps_screenshots()
|
||||
|
||||
async def _connect_browser(self, playwright_instance):
|
||||
"""Return an open browser object. Must be overridden by each subclass."""
|
||||
raise NotImplementedError(f"{type(self).__name__} must implement _connect_browser()")
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
watch_uuid = getattr(self, 'watch_uuid', None)
|
||||
screenshot = await capture_full_page_async(
|
||||
page=self.page,
|
||||
screenshot_format=self.screenshot_format,
|
||||
watch_uuid=watch_uuid,
|
||||
lock_viewport_elements=self.lock_viewport_elements,
|
||||
)
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
if self.browser_steps_screenshot_path is not None:
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
||||
logger.debug(f"Saving step screenshot to {destination}")
|
||||
with open(destination, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
del screenshot
|
||||
gc.collect()
|
||||
|
||||
async def save_step_html(self, step_n):
|
||||
super().save_step_html(step_n=step_n)
|
||||
content = await self.page.content()
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
||||
logger.debug(f"Saving step HTML to {destination}")
|
||||
with open(destination, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
del content
|
||||
gc.collect()
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
screenshot_format=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
watch_uuid=None,
|
||||
):
|
||||
from playwright.async_api import async_playwright
|
||||
import playwright._impl._errors
|
||||
import time
|
||||
|
||||
self.delete_browser_steps_screenshots()
|
||||
self.watch_uuid = watch_uuid
|
||||
response = None
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await self._connect_browser(p)
|
||||
|
||||
ua = manage_user_agent(headers=request_headers) or self.profile_user_agent or None
|
||||
|
||||
context_kwargs = dict(
|
||||
accept_downloads=False,
|
||||
bypass_csp=True,
|
||||
extra_http_headers=request_headers,
|
||||
ignore_https_errors=self.ignore_https_errors,
|
||||
proxy=self.proxy,
|
||||
service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
|
||||
user_agent=ua,
|
||||
viewport={'width': self.viewport_width, 'height': self.viewport_height},
|
||||
)
|
||||
if self.locale:
|
||||
context_kwargs['locale'] = self.locale
|
||||
|
||||
context = await browser.new_context(**context_kwargs)
|
||||
|
||||
if self.block_images:
|
||||
await context.route(
|
||||
re.compile(r'\.(png|jpe?g|gif|svg|ico|webp|avif|bmp)(\?.*)?$', re.IGNORECASE),
|
||||
lambda route: route.abort()
|
||||
)
|
||||
if self.block_fonts:
|
||||
await context.route(
|
||||
re.compile(r'\.(woff2?|ttf|otf|eot)(\?.*)?$', re.IGNORECASE),
|
||||
lambda route: route.abort()
|
||||
)
|
||||
|
||||
self.page = await context.new_page()
|
||||
self.page.on("console", lambda msg: logger.debug(f"Playwright console: {url} {msg.type}: {msg.text}"))
|
||||
|
||||
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface
|
||||
browsersteps_interface = steppable_browser_interface(start_url=url)
|
||||
browsersteps_interface.page = self.page
|
||||
|
||||
response = await browsersteps_interface.action_goto_url(value=url)
|
||||
|
||||
if response is None:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise EmptyReply(url=url, status_code=None)
|
||||
|
||||
try:
|
||||
self.headers = await response.all_headers()
|
||||
except TypeError:
|
||||
self.headers = response.all_headers()
|
||||
|
||||
try:
|
||||
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
|
||||
await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
|
||||
except playwright._impl._errors.TimeoutError:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
pass
|
||||
except Exception as e:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
try:
|
||||
self.status_code = response.status
|
||||
except Exception as e:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if fetch_favicon:
|
||||
try:
|
||||
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
||||
await _safe_request_gc(self.page)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching favicon: {e}")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
raise EmptyReply(url=url, status_code=response.status)
|
||||
|
||||
try:
|
||||
if self.browser_steps:
|
||||
try:
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
except BrowserStepsStepException:
|
||||
raise
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
now = time.time()
|
||||
if current_include_filters is not None:
|
||||
await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||
else:
|
||||
await self.page.evaluate("var include_filters=''")
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
})
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
||||
await _safe_request_gc(self.page)
|
||||
|
||||
self.content = await self.page.content()
|
||||
await _safe_request_gc(self.page)
|
||||
logger.debug(f"Scrape xPath element data done in {time.time() - now:.2f}s")
|
||||
|
||||
self.screenshot = await capture_full_page_async(
|
||||
page=self.page,
|
||||
screenshot_format=self.screenshot_format,
|
||||
watch_uuid=watch_uuid,
|
||||
lock_viewport_elements=self.lock_viewport_elements,
|
||||
)
|
||||
await _safe_request_gc(self.page)
|
||||
gc.collect()
|
||||
|
||||
except ScreenshotUnavailable:
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
|
||||
finally:
|
||||
for obj, name, close_coro in [
|
||||
(self.page if hasattr(self, 'page') and self.page else None, 'page', lambda: self.page.close() if self.page else asyncio.sleep(0)),
|
||||
(context, 'context', lambda: context.close() if context else asyncio.sleep(0)),
|
||||
(browser, 'browser', lambda: browser.close() if browser else asyncio.sleep(0)),
|
||||
]:
|
||||
try:
|
||||
await asyncio.wait_for(close_coro(), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing {name} for {url}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing {name} for {url}: {e}")
|
||||
|
||||
self.page = None
|
||||
context = None
|
||||
browser = None
|
||||
gc.collect()
|
||||
@@ -1,27 +0,0 @@
|
||||
"""
|
||||
Playwright Chrome fetcher — launches a local Chromium browser directly.
|
||||
|
||||
No external browser container is required. Playwright must be installed
|
||||
with Chromium browsers: ``playwright install chromium``.
|
||||
"""
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
|
||||
|
||||
|
||||
class fetcher(PlaywrightBaseFetcher):
|
||||
fetcher_description = "Playwright Chrome (local)"
|
||||
|
||||
async def _connect_browser(self, p):
|
||||
launch_kwargs = {'headless': True}
|
||||
if self.proxy:
|
||||
launch_kwargs['proxy'] = self.proxy
|
||||
return await p.chromium.launch(**launch_kwargs)
|
||||
|
||||
|
||||
class PlaywrightChromePlugin:
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
return ('playwright_chrome', fetcher)
|
||||
|
||||
|
||||
chrome_plugin = PlaywrightChromePlugin()
|
||||
@@ -1,33 +0,0 @@
|
||||
"""
|
||||
Playwright Firefox fetcher — launches a local Firefox browser directly.
|
||||
|
||||
No external browser container is required. Playwright must be installed
|
||||
with Firefox browsers: ``playwright install firefox``.
|
||||
|
||||
Note: ``page.request_gc()`` is Chromium-specific and is silently skipped
|
||||
on Firefox — this is handled transparently by ``_safe_request_gc()`` in
|
||||
the base package.
|
||||
"""
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
|
||||
|
||||
|
||||
class fetcher(PlaywrightBaseFetcher):
|
||||
fetcher_description = "Playwright Firefox (local)"
|
||||
|
||||
status_icon = {'filename': 'firefox-icon.svg', 'alt': 'Using Firefox', 'title': 'Using Firefox'}
|
||||
|
||||
async def _connect_browser(self, p):
|
||||
launch_kwargs = {'headless': True}
|
||||
if self.proxy:
|
||||
launch_kwargs['proxy'] = self.proxy
|
||||
return await p.firefox.launch(**launch_kwargs)
|
||||
|
||||
|
||||
class PlaywrightFirefoxPlugin:
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
return ('playwright_firefox', fetcher)
|
||||
|
||||
|
||||
firefox_plugin = PlaywrightFirefoxPlugin()
|
||||
@@ -1,30 +0,0 @@
|
||||
"""
|
||||
Playwright WebKit fetcher — launches a local WebKit (Safari-engine) browser.
|
||||
|
||||
No external browser container is required. Playwright must be installed
|
||||
with WebKit browsers: ``playwright install webkit``.
|
||||
|
||||
Note: ``page.request_gc()`` is Chromium-specific and is silently skipped
|
||||
on WebKit — handled transparently by ``_safe_request_gc()`` in the base package.
|
||||
"""
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers.playwright import PlaywrightBaseFetcher
|
||||
|
||||
|
||||
class fetcher(PlaywrightBaseFetcher):
|
||||
fetcher_description = "Playwright WebKit/Safari (local)"
|
||||
|
||||
async def _connect_browser(self, p):
|
||||
launch_kwargs = {'headless': True}
|
||||
if self.proxy:
|
||||
launch_kwargs['proxy'] = self.proxy
|
||||
return await p.webkit.launch(**launch_kwargs)
|
||||
|
||||
|
||||
class PlaywrightWebKitPlugin:
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
return ('playwright_webkit', fetcher)
|
||||
|
||||
|
||||
webkit_plugin = PlaywrightWebKitPlugin()
|
||||
@@ -1,5 +1,4 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import websockets.exceptions
|
||||
@@ -7,7 +6,6 @@ from urllib.parse import urlparse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
|
||||
SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS
|
||||
@@ -22,20 +20,18 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"{watch_info}Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
@@ -54,41 +50,26 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport['height']:
|
||||
lock_start = time.time()
|
||||
if page_height > page.viewport['height']:
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page() changes viewport height which triggers @media (min-height) rules
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
file_read_start = time.time()
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
file_read_time = time.time() - file_read_start
|
||||
|
||||
evaluate_start = time.time()
|
||||
await page.evaluate(lock_elements_js)
|
||||
evaluate_time = time.time() - evaluate_start
|
||||
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled - File read: {file_read_time:.3f}s, Browser evaluate: {evaluate_time:.2f}s, Total: {lock_time:.2f}s")
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
"""(y) => {
|
||||
const el = document.scrollingElement;
|
||||
if (el) el.scrollTop = y;
|
||||
document.documentElement.scrollTop = y;
|
||||
document.body.scrollTop = y;
|
||||
}""",
|
||||
y
|
||||
)
|
||||
@@ -101,11 +82,7 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
@@ -116,58 +93,35 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
screenshot_chunks = None
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
return screenshot_chunks[0]
|
||||
|
||||
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Puppeteer Chromium"
|
||||
fetcher_description = "Puppeteer/direct {}/Javascript".format(
|
||||
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
||||
)
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
|
||||
|
||||
browser = None
|
||||
browser_type = ''
|
||||
@@ -179,10 +133,14 @@ class fetcher(Fetcher):
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
|
||||
|
||||
def disk_cleanup_after_fetch(self):
|
||||
self.delete_browser_steps_screenshots()
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for Puppeteer fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
@@ -215,36 +173,19 @@ class fetcher(Fetcher):
|
||||
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
|
||||
|
||||
async def quit(self, watch=None):
|
||||
watch_uuid = watch.get('uuid') if watch else 'unknown'
|
||||
|
||||
# Close page
|
||||
try:
|
||||
if hasattr(self, 'page') and self.page:
|
||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Page closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing page (5s)")
|
||||
await self.page.close()
|
||||
del self.page
|
||||
except Exception as e:
|
||||
logger.warning(f"[{watch_uuid}] Error closing page: {e}")
|
||||
finally:
|
||||
self.page = None
|
||||
pass
|
||||
|
||||
# Close browser connection
|
||||
try:
|
||||
if hasattr(self, 'browser') and self.browser:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Browser closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing browser (5s)")
|
||||
await self.browser.close()
|
||||
del self.browser
|
||||
except Exception as e:
|
||||
logger.warning(f"[{watch_uuid}] Error closing browser: {e}")
|
||||
finally:
|
||||
self.browser = None
|
||||
pass
|
||||
|
||||
logger.info(f"[{watch_uuid}] Cleanup puppeteer complete")
|
||||
|
||||
# Force garbage collection to release resources
|
||||
gc.collect()
|
||||
logger.info("Cleanup puppeteer complete.")
|
||||
|
||||
async def fetch_page(self,
|
||||
current_include_filters,
|
||||
@@ -263,7 +204,7 @@ class fetcher(Fetcher):
|
||||
import re
|
||||
self.delete_browser_steps_screenshots()
|
||||
|
||||
n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 12)) + self.render_extract_delay
|
||||
n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||
extra_wait = min(n, 15)
|
||||
|
||||
logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.")
|
||||
@@ -274,11 +215,9 @@ class fetcher(Fetcher):
|
||||
# Connect directly using the specified browser_ws_endpoint
|
||||
# @todo timeout
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Connecting to browser at {self.browser_connection_url}")
|
||||
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||
ignoreHTTPSErrors=True
|
||||
)
|
||||
logger.debug(f"[{watch_uuid}] Browser connected successfully")
|
||||
except websockets.exceptions.InvalidStatusCode as e:
|
||||
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
||||
except websockets.exceptions.InvalidURI:
|
||||
@@ -287,20 +226,7 @@ class fetcher(Fetcher):
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||
|
||||
# more reliable is to just request a new page
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Creating new page")
|
||||
self.page = await self.browser.newPage()
|
||||
logger.debug(f"[{watch_uuid}] Page created successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"[{watch_uuid}] Failed to create new page: {e}")
|
||||
# Browser is connected but page creation failed - must cleanup browser
|
||||
try:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
||||
finally:
|
||||
self.browser = None
|
||||
raise
|
||||
self.page = await self.browser.newPage()
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||
@@ -362,33 +288,28 @@ class fetcher(Fetcher):
|
||||
# Enable Network domain to detect when first bytes arrive
|
||||
await self.page._client.send('Network.enable')
|
||||
|
||||
# Now set up the frame navigation handlers
|
||||
async def handle_frame_navigation(event=None):
|
||||
# Wait n seconds after the frameStartedLoading, not from any frameStartedLoading/frameStartedNavigating
|
||||
logger.debug(f"Frame navigated: {event}")
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
await asyncio.sleep(w)
|
||||
|
||||
# Check if page still exists (might have been closed due to error during sleep)
|
||||
if not self.page or not hasattr(self.page, '_client'):
|
||||
logger.debug("Page already closed, skipping stopLoading")
|
||||
return
|
||||
|
||||
logger.debug("Issuing stopLoading command...")
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
|
||||
async def setup_frame_handlers_on_first_response(event):
|
||||
# Only trigger for the main document response
|
||||
if event.get('type') == 'Document':
|
||||
logger.debug("First response received, setting up frame handlers for forced page stop load.")
|
||||
|
||||
# De-register this listener - we only need it once
|
||||
self.page._client.remove_listener('Network.responseReceived', setup_frame_handlers_on_first_response)
|
||||
|
||||
# Now set up the frame navigation handlers
|
||||
async def handle_frame_navigation(event):
|
||||
# Wait n seconds after the frameStartedLoading, not from any frameStartedLoading/frameStartedNavigating
|
||||
logger.debug(f"Frame navigated: {event}")
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
await asyncio.sleep(w)
|
||||
logger.debug("Issuing stopLoading command...")
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
|
||||
self.page._client.on('Page.frameStartedNavigating', lambda e: asyncio.create_task(handle_frame_navigation(e)))
|
||||
self.page._client.on('Page.frameStartedLoading', lambda e: asyncio.create_task(handle_frame_navigation(e)))
|
||||
self.page._client.on('Page.frameStoppedLoading', lambda e: logger.debug(f"Frame stopped loading: {e}"))
|
||||
logger.debug("First response received, setting up frame handlers for forced page stop load DONE SETUP")
|
||||
# De-register this listener - we only need it once
|
||||
self.page._client.remove_listener('Network.responseReceived', setup_frame_handlers_on_first_response)
|
||||
|
||||
# Listen for first response to trigger frame handler setup
|
||||
self.page._client.on('Network.responseReceived', setup_frame_handlers_on_first_response)
|
||||
@@ -397,13 +318,8 @@ class fetcher(Fetcher):
|
||||
attempt=0
|
||||
while not response:
|
||||
logger.debug(f"Attempting page fetch {url} attempt {attempt}")
|
||||
asyncio.create_task(handle_frame_navigation())
|
||||
response = await self.page.goto(url, timeout=0)
|
||||
await asyncio.sleep(1 + extra_wait)
|
||||
# Check if page still exists before sending command
|
||||
if self.page and hasattr(self.page, '_client'):
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
|
||||
if response:
|
||||
break
|
||||
if not response:
|
||||
@@ -439,7 +355,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
@@ -451,7 +367,7 @@ class fetcher(Fetcher):
|
||||
|
||||
# Run Browser Steps here
|
||||
# @todo not yet supported, we switch to playwright in this case
|
||||
# if self.browser_steps:
|
||||
# if self.browser_steps_get_valid_steps():
|
||||
# self.iterate_browser_steps()
|
||||
|
||||
|
||||
@@ -469,11 +385,7 @@ class fetcher(Fetcher):
|
||||
|
||||
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force garbage collection - pyppeteer base64 decode creates temporary buffers
|
||||
import gc
|
||||
gc.collect()
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
@@ -527,24 +439,15 @@ class fetcher(Fetcher):
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
finally:
|
||||
# Internal cleanup on any exception/timeout - call quit() immediately
|
||||
# This prevents connection leaks during exception bursts
|
||||
# Worker.py's quit() call becomes a redundant safety net (idempotent)
|
||||
try:
|
||||
await self.quit(watch={'uuid': watch_uuid} if watch_uuid else None)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Error during internal quit() cleanup: {cleanup_error}")
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class PuppeteerFetcherPlugin:
|
||||
"""Plugin class that registers the Puppeteer fetcher as a built-in plugin."""
|
||||
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
"""Register the Puppeteer fetcher"""
|
||||
return ('puppeteer', fetcher)
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
from loguru import logger
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import asyncio
|
||||
|
||||
from functools import partial
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
|
||||
|
||||
# "html_requests" is listed as the default fetcher in store.py!
|
||||
@@ -39,7 +36,7 @@ class fetcher(Fetcher):
|
||||
import requests
|
||||
from requests.exceptions import ProxyError, ConnectionError, RequestException
|
||||
|
||||
if self.browser_steps:
|
||||
if self.browser_steps_get_valid_steps():
|
||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||
|
||||
proxies = {}
|
||||
@@ -58,72 +55,18 @@ class fetcher(Fetcher):
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
# Configure retry adapter for low-level network errors only
|
||||
# Retries connection timeouts, read timeouts, connection resets - not HTTP status codes
|
||||
# Especially helpful in parallel test execution when servers are slow/overloaded
|
||||
# Configurable via REQUESTS_RETRY_MAX_COUNT (default: 3 attempts)
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
max_retries = int(os.getenv("REQUESTS_RETRY_MAX_COUNT", "6"))
|
||||
retry_strategy = Retry(
|
||||
total=max_retries,
|
||||
connect=max_retries, # Retry connection timeouts
|
||||
read=max_retries, # Retry read timeouts
|
||||
status=0, # Don't retry on HTTP status codes
|
||||
backoff_factor=0.5, # Wait 0.3s, 0.6s, 1.2s between retries
|
||||
allowed_methods=["HEAD", "GET", "OPTIONS", "POST"],
|
||||
raise_on_status=False
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||
from requests_file import FileAdapter
|
||||
session.mount('file://', FileAdapter())
|
||||
|
||||
allow_iana_restricted = strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false'))
|
||||
|
||||
try:
|
||||
# Fresh DNS check at fetch time — catches DNS rebinding regardless of add-time cache.
|
||||
if not allow_iana_restricted:
|
||||
parsed_initial = urlparse(url)
|
||||
if parsed_initial.hostname and is_private_hostname(parsed_initial.hostname):
|
||||
raise Exception(f"Fetch blocked: '{url}' resolves to a private/reserved IP address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow.")
|
||||
|
||||
r = session.request(method=request_method,
|
||||
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||
url=url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
allow_redirects=False)
|
||||
|
||||
# Manually follow redirects so each hop's resolved IP can be validated,
|
||||
# preventing SSRF via an open redirect on a public host.
|
||||
current_url = url
|
||||
for _ in range(10):
|
||||
if not r.is_redirect:
|
||||
break
|
||||
location = r.headers.get('Location', '')
|
||||
redirect_url = urljoin(current_url, location)
|
||||
if not allow_iana_restricted:
|
||||
parsed_redirect = urlparse(redirect_url)
|
||||
if parsed_redirect.hostname and is_private_hostname(parsed_redirect.hostname):
|
||||
raise Exception(f"Redirect blocked: '{redirect_url}' resolves to a private/reserved IP address.")
|
||||
current_url = redirect_url
|
||||
r = session.request('GET', redirect_url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
allow_redirects=False)
|
||||
else:
|
||||
raise Exception("Too many redirects")
|
||||
|
||||
verify=False)
|
||||
except Exception as e:
|
||||
msg = str(e)
|
||||
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
|
||||
@@ -149,32 +92,10 @@ class fetcher(Fetcher):
|
||||
# Default to UTF-8 for XML if no encoding found
|
||||
r.encoding = 'utf-8'
|
||||
else:
|
||||
# No charset in HTTP header - sniff encoding in priority order matching browsers
|
||||
# (WHATWG encoding sniffing algorithm):
|
||||
# 1. BOM - highest confidence, check before anything else
|
||||
# 2. <meta charset> in first 2kb
|
||||
# 3. chardet statistical detection - last resort
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
boms = [
|
||||
(b'\xef\xbb\xbf', 'utf-8-sig'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
|
||||
if bom_encoding:
|
||||
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
|
||||
r.encoding = bom_encoding
|
||||
else:
|
||||
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
|
||||
if meta_charset_match:
|
||||
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
|
||||
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
|
||||
r.encoding = encoding
|
||||
else:
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
# For other content types, use chardet
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
self.headers = r.headers
|
||||
|
||||
@@ -221,11 +142,10 @@ class fetcher(Fetcher):
|
||||
watch_uuid=None,
|
||||
):
|
||||
"""Async wrapper that runs the synchronous requests code in a thread pool"""
|
||||
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
|
||||
# Run the synchronous _run_sync in a thread pool to avoid blocking the event loop
|
||||
# Retry logic is handled by requests' HTTPAdapter (see _run_sync for configuration)
|
||||
await loop.run_in_executor(
|
||||
None, # Use default ThreadPoolExecutor
|
||||
lambda: self._run_sync(
|
||||
@@ -243,6 +163,7 @@ class fetcher(Fetcher):
|
||||
)
|
||||
|
||||
async def quit(self, watch=None):
|
||||
|
||||
# In case they switched to `requests` fetcher from something else
|
||||
# Then the screenshot could be old, in any case, it's not used here.
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
|
||||
@@ -259,10 +180,9 @@ class fetcher(Fetcher):
|
||||
class RequestsFetcherPlugin:
|
||||
"""Plugin class that registers the requests fetcher as a built-in plugin."""
|
||||
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
"""Register the requests fetcher"""
|
||||
return ('requests', fetcher)
|
||||
return ('html_requests', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Lock Element Dimensions for Screenshot Capture (First Viewport Only)
|
||||
* Lock Element Dimensions for Screenshot Capture
|
||||
*
|
||||
* THE PROBLEM:
|
||||
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
|
||||
@@ -10,31 +10,40 @@
|
||||
* However, changing the viewport height triggers CSS media queries like:
|
||||
* @media (min-height: 860px) { .ad { height: 250px; } }
|
||||
*
|
||||
* This causes elements (especially ads/headers) to resize during screenshot capture.
|
||||
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
|
||||
* - Screenshot shows element at NEW size (after media query triggered)
|
||||
* - xpath element coordinates measured at OLD size (before viewport change)
|
||||
* - Visual selector overlays don't align with screenshot
|
||||
*
|
||||
* EXAMPLE BUG:
|
||||
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
|
||||
* - Viewport changes to 1280x3809 for screenshot
|
||||
* - Media query triggers: ad expands to 250px
|
||||
* - All content below shifts down by 112px (250-138)
|
||||
* - Article now at position: 391px (279+112)
|
||||
* - But xpath data says 279px → 112px mismatch! ✗
|
||||
*
|
||||
* THE SOLUTION:
|
||||
* Lock element dimensions in the FIRST VIEWPORT ONLY with !important inline styles.
|
||||
* This prevents headers, navigation, and top ads from resizing when viewport changes.
|
||||
* We only lock the visible portion because:
|
||||
* - Most layout shifts happen in headers/navbars/top ads
|
||||
* - Locking only visible elements is 100x+ faster (100-200 elements vs 10,000+)
|
||||
* - Below-fold content shifts don't affect visual comparison accuracy
|
||||
* Before changing viewport, lock ALL element dimensions with !important inline styles.
|
||||
* Inline styles with !important override media query CSS, preventing layout changes.
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Gets current viewport height
|
||||
* 2. Finds elements within first viewport (top of page to bottom of screen)
|
||||
* 3. Locks their dimensions with !important inline styles
|
||||
* 1. Iterates through every element on the page
|
||||
* 2. Captures current computed dimensions (width, height)
|
||||
* 3. Sets inline styles with !important to freeze those dimensions
|
||||
* 4. Disables ResizeObserver API (for JS-based resizing)
|
||||
* 5. When viewport changes for screenshot, media queries can't resize anything
|
||||
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
|
||||
* Only enabled for image_ssim_diff processor (visual comparison).
|
||||
* Default: OFF for performance.
|
||||
* The page must be fully loaded and settled at its initial viewport size.
|
||||
* No need to restore state afterward - page is closed after screenshot.
|
||||
*
|
||||
* PERFORMANCE:
|
||||
* - Only processes 100-300 elements (first viewport) vs 10,000+ (entire page)
|
||||
* - Typically completes in 10-50ms
|
||||
* - 100x+ faster than locking entire page
|
||||
* - Iterates all DOM elements (can be 1000s on complex pages)
|
||||
* - Typically completes in 50-200ms
|
||||
* - One-time cost before screenshot, well worth it for coordinate accuracy
|
||||
*
|
||||
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
|
||||
*/
|
||||
@@ -43,34 +52,11 @@
|
||||
// Store original styles in a global WeakMap for later restoration
|
||||
window.__elementSizingRestore = new WeakMap();
|
||||
|
||||
const start = performance.now();
|
||||
|
||||
// Get current viewport height (visible portion of page)
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
// Get all elements and filter to FIRST VIEWPORT ONLY
|
||||
// This dramatically reduces elements to process (100-300 vs 10,000+)
|
||||
const allElements = Array.from(document.querySelectorAll('*'));
|
||||
|
||||
// BATCH READ PHASE: Get bounding rects and filter to viewport
|
||||
const measurements = allElements.map(el => {
|
||||
const rect = el.getBoundingClientRect();
|
||||
// Lock ALL element dimensions to prevent media query layout changes
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const rect = el.getBoundingClientRect();
|
||||
|
||||
// Only lock elements in the first viewport (visible on initial page load)
|
||||
// rect.top < viewportHeight means element starts within visible area
|
||||
const inViewport = rect.top < viewportHeight && rect.top >= 0;
|
||||
const hasSize = rect.height > 0 && rect.width > 0;
|
||||
|
||||
return inViewport && hasSize ? { el, computed, rect } : null;
|
||||
}).filter(Boolean); // Remove null entries
|
||||
|
||||
const elapsed = performance.now() - start;
|
||||
console.log(`Locked first viewport elements: ${measurements.length} of ${allElements.length} total elements (viewport height: ${viewportHeight}px, took ${elapsed.toFixed(0)}ms)`);
|
||||
|
||||
// BATCH WRITE PHASE: Apply all inline styles without triggering layout
|
||||
// No interleaved reads means browser can optimize style application
|
||||
measurements.forEach(({el, computed, rect}) => {
|
||||
// Save original inline style values BEFORE locking
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
const originalStyles = {};
|
||||
@@ -103,5 +89,5 @@
|
||||
disconnect() {}
|
||||
};
|
||||
|
||||
console.log(`✓ Element dimensions locked (${measurements.length} elements) to prevent media query changes during screenshot`);
|
||||
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
|
||||
})();
|
||||
|
||||
@@ -8,42 +8,92 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
||||
|
||||
def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_height):
|
||||
# Cache font to avoid loading on every stitch
|
||||
_cached_font = None
|
||||
|
||||
def _get_caption_font():
|
||||
"""Get or create cached font for caption text."""
|
||||
global _cached_font
|
||||
if _cached_font is None:
|
||||
from PIL import ImageFont
|
||||
try:
|
||||
_cached_font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
_cached_font = ImageFont.load_default()
|
||||
return _cached_font
|
||||
|
||||
|
||||
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
|
||||
Uses spawn multiprocessing to isolate PIL's C-level memory allocation.
|
||||
When the subprocess exits, the OS reclaims ALL memory including C-level allocations
|
||||
that Python's GC cannot release. This prevents the ~50MB per stitch from accumulating
|
||||
in the main process.
|
||||
|
||||
Trade-off: Adds 35MB resource_tracker subprocess, but prevents 500MB+ memory leak
|
||||
in main process (much better at scale: 35GB vs 500GB for 1000 instances).
|
||||
Stitch image chunks together inline (no multiprocessing).
|
||||
Optimized for small number of chunks (2-3) to avoid process creation overhead.
|
||||
|
||||
Args:
|
||||
pipe_conn: Pipe connection to receive data and send result
|
||||
chunks_bytes: List of JPEG image bytes
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
|
||||
Returns:
|
||||
bytes: Stitched JPEG image
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
# Create stitched image
|
||||
stitched = Image.new('RGB', (max_width, total_height))
|
||||
y_offset = 0
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font = _get_caption_font()
|
||||
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode to JPEG
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result = output.getvalue()
|
||||
|
||||
# Cleanup
|
||||
stitched.close()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
Used for large number of chunks (4+) to avoid blocking the main event loop.
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
import struct
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
try:
|
||||
# Receive chunk count as 4-byte integer (no pickle!)
|
||||
count_bytes = pipe_conn.recv_bytes()
|
||||
chunk_count = struct.unpack('I', count_bytes)[0]
|
||||
|
||||
# Receive each chunk as raw bytes (no pickle!)
|
||||
chunks_bytes = []
|
||||
for _ in range(chunk_count):
|
||||
chunks_bytes.append(pipe_conn.recv_bytes())
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
del chunks_bytes
|
||||
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
@@ -53,14 +103,15 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close()
|
||||
del images
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
|
||||
# Try to load font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
@@ -69,26 +120,23 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode and send
|
||||
# Encode and send image with optimization
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result_bytes = output.getvalue()
|
||||
pipe_conn.send_bytes(output.getvalue())
|
||||
|
||||
stitched.close()
|
||||
del stitched
|
||||
output.close()
|
||||
del output
|
||||
|
||||
pipe_conn.send_bytes(result_bytes)
|
||||
del result_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in stitch_images_worker_raw_bytes: {e}")
|
||||
error_msg = f"error:{e}".encode('utf-8')
|
||||
pipe_conn.send_bytes(error_msg)
|
||||
pipe_conn.send(f"error:{e}")
|
||||
finally:
|
||||
pipe_conn.close()
|
||||
|
||||
|
||||
|
||||
@@ -3,21 +3,30 @@ import time
|
||||
|
||||
from loguru import logger
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
|
||||
class fetcher(Fetcher):
|
||||
fetcher_description = "Selenium WebDriver Chrome"
|
||||
if os.getenv("WEBDRIVER_URL"):
|
||||
fetcher_description = f"WebDriver Chrome/Javascript via \"{os.getenv('WEBDRIVER_URL', '')}\""
|
||||
else:
|
||||
fetcher_description = "WebDriver Chrome/Javascript"
|
||||
|
||||
proxy = None
|
||||
proxy_url = None
|
||||
|
||||
# Capability flags
|
||||
supports_browser_steps = False
|
||||
supports_browser_steps = True
|
||||
supports_screenshots = True
|
||||
supports_xpath_element_data = True
|
||||
|
||||
status_icon = {'filename': 'google-chrome-icon.png', 'alt': 'Using a Chrome browser', 'title': 'Using a Chrome browser'}
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for WebDriver fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
@@ -147,19 +156,6 @@ class fetcher(Fetcher):
|
||||
from PIL import Image
|
||||
import io
|
||||
img = Image.open(io.BytesIO(screenshot_png))
|
||||
# Convert to RGB if needed (JPEG doesn't support transparency)
|
||||
# Always convert non-RGB modes to RGB to ensure JPEG compatibility
|
||||
if img.mode in ('RGBA', 'LA', 'P', 'PA'):
|
||||
# Handle transparency by compositing onto white background
|
||||
if img.mode == 'P':
|
||||
img = img.convert('RGBA')
|
||||
background = Image.new('RGB', img.size, (255, 255, 255))
|
||||
if img.mode in ('RGBA', 'LA', 'PA'):
|
||||
background.paste(img, mask=img.split()[-1]) # Use alpha channel as mask
|
||||
img = background
|
||||
elif img.mode != 'RGB':
|
||||
# For other modes, direct conversion
|
||||
img = img.convert('RGB')
|
||||
jpeg_buffer = io.BytesIO()
|
||||
img.save(jpeg_buffer, format='JPEG', quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||
self.screenshot = jpeg_buffer.getvalue()
|
||||
@@ -181,10 +177,9 @@ class fetcher(Fetcher):
|
||||
class WebDriverSeleniumFetcherPlugin:
|
||||
"""Plugin class that registers the WebDriver Selenium fetcher as a built-in plugin."""
|
||||
|
||||
@hookimpl
|
||||
def register_content_fetcher(self):
|
||||
"""Register the WebDriver Selenium fetcher"""
|
||||
return ('selenium', fetcher)
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
|
||||
@@ -57,15 +57,14 @@ class SignalPriorityQueue(queue.PriorityQueue):
|
||||
def put(self, item, block=True, timeout=None):
|
||||
# Call the parent's put method first
|
||||
super().put(item, block, timeout)
|
||||
|
||||
|
||||
# After putting the item in the queue, check if it has a UUID and emit signal
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
||||
uuid = item.item['uuid']
|
||||
# Get the signal and send it if it exists
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
# NOTE: This would block other workers from .put/.get while this signal sends
|
||||
# Signal handlers may iterate the queue/datastore while holding locks
|
||||
# Send the watch_uuid parameter
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
# Send queue_length signal with current queue size
|
||||
@@ -313,15 +312,14 @@ class AsyncSignalPriorityQueue(asyncio.PriorityQueue):
|
||||
async def put(self, item):
|
||||
# Call the parent's put method first
|
||||
await super().put(item)
|
||||
|
||||
|
||||
# After putting the item in the queue, check if it has a UUID and emit signal
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
||||
uuid = item.item['uuid']
|
||||
# Get the signal and send it if it exists
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
# NOTE: This would block other workers from .put/.get while this signal sends
|
||||
# Signal handlers may iterate the queue/datastore while holding locks
|
||||
# Send the watch_uuid parameter
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
# Send queue_length signal with current queue size
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
"""
|
||||
Favicon utilities for changedetection.io
|
||||
Handles favicon MIME type detection with caching
|
||||
"""
|
||||
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
@lru_cache(maxsize=1000)
|
||||
def get_favicon_mime_type(filepath):
|
||||
"""
|
||||
Detect MIME type of favicon by reading file content using puremagic.
|
||||
Results are cached to avoid repeatedly reading the same files.
|
||||
|
||||
Args:
|
||||
filepath: Full path to the favicon file
|
||||
|
||||
Returns:
|
||||
MIME type string (e.g., 'image/png')
|
||||
"""
|
||||
mime = None
|
||||
|
||||
try:
|
||||
import puremagic
|
||||
with open(filepath, 'rb') as f:
|
||||
content_bytes = f.read(200) # Read first 200 bytes
|
||||
|
||||
detections = puremagic.magic_string(content_bytes)
|
||||
if detections:
|
||||
mime = detections[0].mime_type
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to mimetypes if puremagic fails
|
||||
if not mime:
|
||||
import mimetypes
|
||||
mime, _ = mimetypes.guess_type(filepath)
|
||||
|
||||
# Final fallback based on extension
|
||||
if not mime:
|
||||
mime = 'image/x-icon' if filepath.endswith('.ico') else 'image/png'
|
||||
|
||||
return mime
|
||||
@@ -4,18 +4,16 @@ import flask_login
|
||||
import locale
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import timeago
|
||||
from blinker import signal
|
||||
from pathlib import Path
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
from flask import (
|
||||
Flask,
|
||||
@@ -28,6 +26,8 @@ from flask import (
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
from flask_login import current_user
|
||||
from flask_restful import abort, Api
|
||||
from flask_cors import CORS
|
||||
|
||||
@@ -40,13 +40,10 @@ from loguru import logger
|
||||
|
||||
from changedetectionio import __version__
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon, Spec
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||
from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
IN_PYTEST = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -57,7 +54,7 @@ extra_stylesheets = []
|
||||
# Use bulletproof janus-based queues for sync/async reliability
|
||||
update_q = RecheckPriorityQueue()
|
||||
notification_q = NotificationQueue()
|
||||
MAX_QUEUE_SIZE = 5000
|
||||
MAX_QUEUE_SIZE = 2000
|
||||
|
||||
app = Flask(__name__,
|
||||
static_url_path="",
|
||||
@@ -69,43 +66,11 @@ socketio_server = None
|
||||
|
||||
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
|
||||
CORS(app)
|
||||
from werkzeug.routing import BaseConverter, ValidationError
|
||||
from uuid import UUID
|
||||
|
||||
class StrictUUIDConverter(BaseConverter):
|
||||
# Special sentinel values allowed in addition to strict UUIDs
|
||||
_ALLOWED_SENTINELS = frozenset({'first'})
|
||||
|
||||
def to_python(self, value: str) -> str:
|
||||
if value in self._ALLOWED_SENTINELS:
|
||||
return value
|
||||
try:
|
||||
u = UUID(value)
|
||||
except ValueError as e:
|
||||
raise ValidationError() from e
|
||||
# Reject non-standard formats (braces, URNs, no-hyphens)
|
||||
if str(u) != value.lower():
|
||||
raise ValidationError()
|
||||
return str(u)
|
||||
|
||||
def to_url(self, value) -> str:
|
||||
return str(value)
|
||||
|
||||
# app setup (once)
|
||||
app.url_map.converters["uuid_str"] = StrictUUIDConverter
|
||||
|
||||
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak.
|
||||
# There's also a bug between flask compress and socketio that causes some kind of slow memory leak
|
||||
# It's better to use compression on your reverse proxy (nginx etc) instead.
|
||||
if strtobool(os.getenv("FLASK_ENABLE_COMPRESSION")):
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
app.config['COMPRESS_MIN_SIZE'] = 2096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
||||
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
||||
compress = FlaskCompress()
|
||||
compress.init_app(app)
|
||||
|
||||
# Super handy for compressing large BrowserSteps responses and others
|
||||
FlaskCompress(app)
|
||||
app.config['COMPRESS_MIN_SIZE'] = 4096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
||||
|
||||
|
||||
@@ -118,18 +83,6 @@ app.config['NEW_VERSION_AVAILABLE'] = False
|
||||
if os.getenv('FLASK_SERVER_NAME'):
|
||||
app.config['SERVER_NAME'] = os.getenv('FLASK_SERVER_NAME')
|
||||
|
||||
# Babel/i18n configuration
|
||||
app.config['BABEL_TRANSLATION_DIRECTORIES'] = str(Path(__file__).parent / 'translations')
|
||||
app.config['BABEL_DEFAULT_LOCALE'] = 'en_GB'
|
||||
|
||||
# Session configuration
|
||||
# NOTE: Flask session (for locale, etc.) is separate from Flask-Login's remember-me cookie
|
||||
# - Flask session stores data like session['locale'] in a signed cookie
|
||||
# - Flask-Login's remember=True creates a separate authentication cookie
|
||||
# - Setting PERMANENT_SESSION_LIFETIME controls how long the Flask session cookie lasts
|
||||
from datetime import timedelta
|
||||
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=3650) # ~10 years (effectively unlimited)
|
||||
|
||||
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
|
||||
|
||||
|
||||
@@ -218,16 +171,12 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||
# Format the number with two decimal places (locale format string will return 6 decimal)
|
||||
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
||||
return formatted_value
|
||||
|
||||
@app.template_filter('regex_search')
|
||||
def _jinja2_filter_regex_search(value, pattern):
|
||||
import re
|
||||
return re.search(pattern, str(value)) is not None
|
||||
return formatted_value
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
return worker_pool.is_watch_running(watch_obj['uuid'])
|
||||
return worker_handler.is_watch_running(watch_obj['uuid'])
|
||||
|
||||
@app.template_global('get_watch_queue_position')
|
||||
def _get_watch_queue_position(watch_obj):
|
||||
@@ -238,13 +187,13 @@ def _get_watch_queue_position(watch_obj):
|
||||
@app.template_global('get_current_worker_count')
|
||||
def _get_current_worker_count():
|
||||
"""Get the current number of operational workers"""
|
||||
return worker_pool.get_worker_count()
|
||||
return worker_handler.get_worker_count()
|
||||
|
||||
@app.template_global('get_worker_status_info')
|
||||
def _get_worker_status_info():
|
||||
"""Get detailed worker status information for display"""
|
||||
status = worker_pool.get_worker_status()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
status = worker_handler.get_worker_status()
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
return {
|
||||
'count': status['worker_count'],
|
||||
@@ -298,99 +247,54 @@ def _jinja2_filter_seconds_precise(timestamp):
|
||||
|
||||
return format(int(time.time()-timestamp), ',d')
|
||||
|
||||
@app.template_filter('format_duration')
|
||||
def _jinja2_filter_format_duration(seconds):
|
||||
"""Format a duration in seconds into human readable string like '5 days, 3 hours, 30 minutes'"""
|
||||
from datetime import timedelta
|
||||
|
||||
if not seconds or seconds < 0:
|
||||
return gettext('0 seconds')
|
||||
|
||||
td = timedelta(seconds=int(seconds))
|
||||
|
||||
# Calculate components
|
||||
years = td.days // 365
|
||||
remaining_days = td.days % 365
|
||||
months = remaining_days // 30
|
||||
remaining_days = remaining_days % 30
|
||||
weeks = remaining_days // 7
|
||||
days = remaining_days % 7
|
||||
|
||||
hours = td.seconds // 3600
|
||||
minutes = (td.seconds % 3600) // 60
|
||||
secs = td.seconds % 60
|
||||
|
||||
# Build parts list
|
||||
parts = []
|
||||
if years > 0:
|
||||
parts.append(f"{years} {gettext('year') if years == 1 else gettext('years')}")
|
||||
if months > 0:
|
||||
parts.append(f"{months} {gettext('month') if months == 1 else gettext('months')}")
|
||||
if weeks > 0:
|
||||
parts.append(f"{weeks} {gettext('week') if weeks == 1 else gettext('weeks')}")
|
||||
if days > 0:
|
||||
parts.append(f"{days} {gettext('day') if days == 1 else gettext('days')}")
|
||||
if hours > 0:
|
||||
parts.append(f"{hours} {gettext('hour') if hours == 1 else gettext('hours')}")
|
||||
if minutes > 0:
|
||||
parts.append(f"{minutes} {gettext('minute') if minutes == 1 else gettext('minutes')}")
|
||||
if secs > 0 or not parts:
|
||||
parts.append(f"{secs} {gettext('second') if secs == 1 else gettext('seconds')}")
|
||||
|
||||
return ", ".join(parts)
|
||||
|
||||
@app.template_filter('fetcher_status_icons')
|
||||
def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
"""Return status icon HTML for a fetcher, or empty string if none.
|
||||
"""Get status icon HTML for a given fetcher.
|
||||
|
||||
Built-in fetchers declare their icon via the ``status_icon`` class attribute
|
||||
on their ``Fetcher`` subclass. Plugin fetchers may still use the pluggy
|
||||
``collect_fetcher_status_icons`` hook as a fallback.
|
||||
This filter checks both built-in fetchers and plugin fetchers for status icons.
|
||||
|
||||
Args:
|
||||
fetcher_name: The fetcher name (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
str: HTML string containing status icon elements
|
||||
"""
|
||||
from changedetectionio import content_fetchers
|
||||
from changedetectionio.pluggy_interface import collect_fetcher_status_icons
|
||||
from markupsafe import Markup
|
||||
from flask import url_for
|
||||
|
||||
icon_data = None
|
||||
|
||||
fetcher_class = content_fetchers.get_fetcher(fetcher_name)
|
||||
if fetcher_class is not None:
|
||||
icon_data = getattr(fetcher_class, 'status_icon', None)
|
||||
if not icon_data and callable(getattr(fetcher_class, 'get_status_icon_data', None)):
|
||||
# First check if it's a plugin fetcher (plugins have priority)
|
||||
plugin_icon_data = collect_fetcher_status_icons(fetcher_name)
|
||||
if plugin_icon_data:
|
||||
icon_data = plugin_icon_data
|
||||
# Check if it's a built-in fetcher
|
||||
elif hasattr(content_fetchers, fetcher_name):
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name)
|
||||
if hasattr(fetcher_class, 'get_status_icon_data'):
|
||||
icon_data = fetcher_class.get_status_icon_data()
|
||||
|
||||
# Fallback: pluggy hook for plugins that implement fetcher_status_icon
|
||||
if not icon_data:
|
||||
from changedetectionio.pluggy_interface import collect_fetcher_status_icons
|
||||
icon_data = collect_fetcher_status_icons(fetcher_name)
|
||||
# Build HTML from icon data
|
||||
if icon_data and isinstance(icon_data, dict):
|
||||
# Use 'group' from icon_data if specified, otherwise default to 'images'
|
||||
group = icon_data.get('group', 'images')
|
||||
|
||||
if not icon_data:
|
||||
return ''
|
||||
# Try to use url_for, but fall back to manual URL building if endpoint not registered yet
|
||||
try:
|
||||
icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
|
||||
except:
|
||||
# Fallback: build URL manually respecting APPLICATION_ROOT
|
||||
from flask import request
|
||||
app_root = request.script_root if hasattr(request, 'script_root') else ''
|
||||
icon_url = f"{app_root}/static/{group}/{icon_data['filename']}"
|
||||
|
||||
group = icon_data.get('group', 'images')
|
||||
icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
|
||||
style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
|
||||
return Markup(f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>')
|
||||
style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
|
||||
html = f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>'
|
||||
return Markup(html)
|
||||
|
||||
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
|
||||
|
||||
@app.template_filter('sanitize_tag_class')
|
||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
"""Sanitize a tag title to create a valid CSS class name.
|
||||
Removes all non-alphanumeric characters and converts to lowercase.
|
||||
|
||||
Args:
|
||||
tag_title: The tag title string
|
||||
|
||||
Returns:
|
||||
str: A sanitized string suitable for use as a CSS class name
|
||||
"""
|
||||
# Remove all non-alphanumeric characters and convert to lowercase
|
||||
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
|
||||
# Ensure it starts with a letter (CSS requirement)
|
||||
if sanitized and not sanitized[0].isalpha():
|
||||
sanitized = 'tag' + sanitized
|
||||
return sanitized if sanitized else 'tag'
|
||||
return ''
|
||||
|
||||
# Import login_optionally_required from auth_decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
@@ -446,23 +350,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
global datastore, socketio_server
|
||||
datastore = datastore_o
|
||||
|
||||
# Set datastore reference in notification queue for all_muted checking
|
||||
notification_q.set_datastore(datastore)
|
||||
|
||||
# Import and create a wrapper for is_safe_url that has access to app
|
||||
from changedetectionio.is_safe_url import is_safe_url as _is_safe_url
|
||||
|
||||
def is_safe_url(target):
|
||||
"""Wrapper for is_safe_url that passes the app instance"""
|
||||
return _is_safe_url(target, app)
|
||||
|
||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE'] = datastore_o
|
||||
|
||||
# Store batch mode flag to skip background threads when running in batch mode
|
||||
app.config['batch_mode'] = config.get('batch_mode', False) if config else False
|
||||
|
||||
|
||||
# Store the signal in the app config to ensure it's accessible everywhere
|
||||
app.config['watch_check_update_SIGNAL'] = watch_check_update
|
||||
|
||||
@@ -474,21 +365,16 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
available_languages = get_available_languages()
|
||||
language_codes = get_language_codes()
|
||||
|
||||
_locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
_locale_match_list = language_codes + list(_locale_aliases.keys())
|
||||
|
||||
def get_locale():
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
return session['locale']
|
||||
|
||||
locale = session['locale']
|
||||
logger.trace(f"DEBUG: get_locale() returning from session: {locale}")
|
||||
return locale
|
||||
# 2. Fall back to Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(_locale_match_list)
|
||||
# 3. Map browser locale to our internal locale if needed
|
||||
return _locale_aliases.get(browser_locale, browser_locale)
|
||||
locale = request.accept_languages.best_match(language_codes)
|
||||
logger.trace(f"DEBUG: get_locale() returning from Accept-Language: {locale}")
|
||||
return locale
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
@@ -540,22 +426,22 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
|
||||
watch_api.add_resource(WatchHistoryDiff,
|
||||
'/api/v1/watch/<uuid_str:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchSingleHistory,
|
||||
'/api/v1/watch/<uuid_str:uuid>/history/<string:timestamp>',
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
watch_api.add_resource(WatchFavicon,
|
||||
'/api/v1/watch/<uuid_str:uuid>/favicon',
|
||||
'/api/v1/watch/<string:uuid>/favicon',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchHistory,
|
||||
'/api/v1/watch/<uuid_str:uuid>/history',
|
||||
'/api/v1/watch/<string:uuid>/history',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(CreateWatch, '/api/v1/watch',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(Watch, '/api/v1/watch/<uuid_str:uuid>',
|
||||
watch_api.add_resource(Watch, '/api/v1/watch/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(SystemInfo, '/api/v1/systeminfo',
|
||||
@@ -568,7 +454,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(Tags, '/api/v1/tags',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<uuid_str:uuid>',
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(Search, '/api/v1/search',
|
||||
@@ -577,8 +463,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(Notifications, '/api/v1/notifications',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Spec, '/api/v1/full-spec')
|
||||
|
||||
@login_manager.user_loader
|
||||
def user_loader(email):
|
||||
user = User()
|
||||
@@ -587,76 +471,36 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
@login_manager.unauthorized_handler
|
||||
def unauthorized_handler():
|
||||
# Pass the current request path so users are redirected back after login
|
||||
return redirect(url_for('login', redirect=request.path))
|
||||
return redirect(url_for('login', next=url_for('watchlist.index')))
|
||||
|
||||
@app.route('/logout')
|
||||
def logout():
|
||||
flask_login.logout_user()
|
||||
|
||||
# Check if there's a redirect parameter to return to after re-login
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, pass it to login page
|
||||
if redirect_url and is_safe_url(redirect_url):
|
||||
return redirect(url_for('login', redirect=redirect_url))
|
||||
|
||||
# Otherwise just go to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@app.route('/set-language/<locale>')
|
||||
def set_language(locale):
|
||||
"""Set the user's preferred language in the session"""
|
||||
if not request.cookies:
|
||||
logger.error("Cannot set language without session cookie")
|
||||
flash("Cannot set language without session cookie", 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Validate the locale against available languages
|
||||
if locale in language_codes:
|
||||
# Make session permanent so language preference persists across browser sessions
|
||||
# NOTE: This is the Flask session cookie (separate from Flask-Login's remember-me auth cookie)
|
||||
session.permanent = True
|
||||
session['locale'] = locale
|
||||
|
||||
# CRITICAL: Flask-Babel caches the locale in the request context (ctx.babel_locale)
|
||||
# We must refresh to clear this cache so the new locale takes effect immediately
|
||||
# This is especially important for tests where multiple requests happen rapidly
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
else:
|
||||
logger.error(f"Invalid locale {locale}, available: {language_codes}")
|
||||
|
||||
# Check if there's a redirect parameter to return to the same page
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, use it
|
||||
if redirect_url and is_safe_url(redirect_url):
|
||||
return redirect(redirect_url)
|
||||
|
||||
# Otherwise redirect to watchlist
|
||||
# Redirect back to the page they came from, or home
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# https://github.com/pallets/flask/blob/93dd1709d05a1cf0e886df6223377bdab3b077fb/examples/tutorial/flaskr/__init__.py#L39
|
||||
# You can divide up the stuff like this
|
||||
@app.route('/login', methods=['GET', 'POST'])
|
||||
def login():
|
||||
# Extract and validate the redirect parameter
|
||||
redirect_url = request.args.get('redirect') or request.form.get('redirect')
|
||||
|
||||
# Validate the redirect URL - default to watchlist if invalid
|
||||
if redirect_url and is_safe_url(redirect_url):
|
||||
validated_redirect = redirect_url
|
||||
else:
|
||||
validated_redirect = url_for('watchlist.index')
|
||||
|
||||
if request.method == 'GET':
|
||||
if flask_login.current_user.is_authenticated:
|
||||
# Already logged in - redirect immediately to the target
|
||||
flash(gettext("Already logged in"))
|
||||
return redirect(validated_redirect)
|
||||
return redirect(url_for("watchlist.index"))
|
||||
flash(gettext("You must be logged in, please log in."), 'error')
|
||||
output = render_template("login.html", redirect_url=validated_redirect)
|
||||
output = render_template("login.html")
|
||||
return output
|
||||
|
||||
user = User()
|
||||
@@ -666,13 +510,23 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
if (user.check_password(password)):
|
||||
flask_login.login_user(user, remember=True)
|
||||
# Redirect to the validated URL after successful login
|
||||
return redirect(validated_redirect)
|
||||
|
||||
# For now there's nothing else interesting here other than the index/list page
|
||||
# It's more reliable and safe to ignore the 'next' redirect
|
||||
# When we used...
|
||||
# next = request.args.get('next')
|
||||
# return redirect(next or url_for('watchlist.index'))
|
||||
# We would sometimes get login loop errors on sites hosted in sub-paths
|
||||
|
||||
# note for the future:
|
||||
# if not is_safe_valid_url(next):
|
||||
# return flask.abort(400)
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
else:
|
||||
flash(gettext('Incorrect password'), 'error')
|
||||
|
||||
return redirect(url_for('login', redirect=redirect_url if redirect_url else None))
|
||||
return redirect(url_for('login'))
|
||||
|
||||
@app.before_request
|
||||
def before_request_handle_cookie_x_settings():
|
||||
@@ -720,14 +574,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
def static_content(group, filename):
|
||||
from flask import make_response
|
||||
import re
|
||||
|
||||
# Strict sanitization: only allow a-z, 0-9, and underscore (blocks .. and other traversal)
|
||||
group = re.sub(r'[^a-z0-9_-]+', '', group.lower())
|
||||
filename = filename
|
||||
|
||||
# Additional safety: reject if sanitization resulted in empty strings
|
||||
if not group or not filename:
|
||||
abort(404)
|
||||
group = re.sub(r'[^\w.-]+', '', group.lower())
|
||||
filename = re.sub(r'[^\w.-]+', '', filename.lower())
|
||||
|
||||
if group == 'screenshot':
|
||||
# Could be sensitive, follow password requirements
|
||||
@@ -761,11 +609,18 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
|
||||
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
@@ -860,15 +715,13 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# watchlist UI buttons etc
|
||||
import changedetectionio.blueprint.ui as ui
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_pool, queuedWatchMetaData, watch_check_update))
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update))
|
||||
|
||||
import changedetectionio.blueprint.watchlist as watchlist
|
||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||
|
||||
# Initialize Socket.IO server conditionally based on settings
|
||||
socket_io_enabled = datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True)
|
||||
if socket_io_enabled and app.config.get('batch_mode'):
|
||||
socket_io_enabled = False
|
||||
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
if socket_io_enabled:
|
||||
from changedetectionio.realtime.socket_server import init_socketio
|
||||
global socketio_server
|
||||
@@ -897,10 +750,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
|
||||
# Get basic status
|
||||
status = worker_pool.get_worker_status()
|
||||
status = worker_handler.get_worker_status()
|
||||
|
||||
# Perform health check
|
||||
health_result = worker_pool.check_worker_health(
|
||||
health_result = worker_handler.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -964,31 +817,16 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Can be overridden by ENV or use the default settings
|
||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
logger.info(f"Starting {n_workers} workers during app initialization")
|
||||
worker_pool.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
|
||||
# Skip background threads in batch mode (just process queue and exit)
|
||||
batch_mode = app.config.get('batch_mode', False)
|
||||
if not batch_mode:
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||
threading.Thread(target=notification_runner).start()
|
||||
|
||||
# Start configurable number of notification workers (default 1)
|
||||
notification_workers = int(os.getenv("NOTIFICATION_WORKERS", "1"))
|
||||
for i in range(notification_workers):
|
||||
threading.Thread(
|
||||
target=notification_runner,
|
||||
args=(i,),
|
||||
daemon=True,
|
||||
name=f"NotificationRunner-{i}"
|
||||
).start()
|
||||
logger.info(f"Started {notification_workers} notification worker(s)")
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
else:
|
||||
logger.info("Batch mode: Skipping ticker thread, notification runner, and version checker")
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version).start()
|
||||
|
||||
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
|
||||
# This avoids circular dependencies
|
||||
@@ -1001,16 +839,15 @@ def check_for_new_version():
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
session = requests.Session()
|
||||
session.verify = False
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
r = session.post("https://changedetection.io/check-ver.php",
|
||||
r = requests.post("https://changedetection.io/check-ver.php",
|
||||
data={'version': __version__,
|
||||
'app_guid': datastore.data['app_guid'],
|
||||
'watch_count': len(datastore.data['watching'])
|
||||
})
|
||||
},
|
||||
|
||||
verify=False)
|
||||
except:
|
||||
pass
|
||||
|
||||
@@ -1024,17 +861,17 @@ def check_for_new_version():
|
||||
app.config.exit.wait(86400)
|
||||
|
||||
|
||||
def notification_runner(worker_id=0):
|
||||
def notification_runner():
|
||||
global notification_debug_log
|
||||
from datetime import datetime
|
||||
import json
|
||||
with app.app_context():
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
# Multiple workers can run concurrently (configurable via NOTIFICATION_WORKERS)
|
||||
# At the moment only one thread runs (single runner)
|
||||
n_object = notification_q.get(block=False)
|
||||
except queue.Empty:
|
||||
app.config.exit.wait(1)
|
||||
time.sleep(1)
|
||||
|
||||
else:
|
||||
|
||||
@@ -1057,7 +894,7 @@ def notification_runner(worker_id=0):
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Notification worker {worker_id} - Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
|
||||
# UUID wont be present when we submit a 'test' from the global settings
|
||||
if 'uuid' in n_object:
|
||||
@@ -1071,7 +908,7 @@ def notification_runner(worker_id=0):
|
||||
app.config['watch_check_update_SIGNAL'].send(app_context=app, watch_uuid=n_object.get('uuid'))
|
||||
|
||||
# Process notifications
|
||||
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%c"), json.dumps(sent_obj))]
|
||||
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
|
||||
# Trim the log length
|
||||
notification_debug_log = notification_debug_log[-100:]
|
||||
|
||||
@@ -1087,10 +924,6 @@ def ticker_thread_check_time_launch_checks():
|
||||
logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}")
|
||||
|
||||
# Workers are now started during app initialization, not here
|
||||
WAIT_TIME_BETWEEN_LOOP = 1.0 if not IN_PYTEST else 0.01
|
||||
if IN_PYTEST:
|
||||
# The time between loops should be less than the first .sleep/wait in def wait_for_all_checks() of tests/util.py
|
||||
logger.warning(f"Looks like we're in PYTEST! Setting time between searching for items to add to the queue to {WAIT_TIME_BETWEEN_LOOP}s")
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
|
||||
@@ -1098,7 +931,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
now = time.time()
|
||||
if now - last_health_check > 60:
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
health_result = worker_pool.check_worker_health(
|
||||
health_result = worker_handler.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -1108,19 +941,11 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
if health_result['status'] != 'healthy':
|
||||
logger.warning(f"Worker health check: {health_result['message']}")
|
||||
|
||||
|
||||
last_health_check = now
|
||||
|
||||
# Check if all checks are paused
|
||||
if datastore.data['settings']['application'].get('all_paused', False):
|
||||
app.config.exit.wait(1)
|
||||
continue
|
||||
|
||||
# Get a list of watches by UUID that are currently fetching data
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
# Build set of queued UUIDs once for O(1) lookup instead of O(n) per watch
|
||||
queued_uuids = {q_item.item['uuid'] for q_item in update_q.queue}
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
|
||||
watch_uuid_list = []
|
||||
@@ -1138,17 +963,16 @@ def ticker_thread_check_time_launch_checks():
|
||||
else:
|
||||
break
|
||||
|
||||
# Re #438 - Don't place more watches in the queue to be checked if the queue is already large
|
||||
while update_q.qsize() >= 2000:
|
||||
logger.warning(f"Recheck watches queue size limit reached ({MAX_QUEUE_SIZE}), skipping adding more items")
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
recheck_time_system_seconds = int(datastore.threshold_seconds)
|
||||
|
||||
# Check for watches outside of the time threshold to put in the thread queue.
|
||||
for watch_index, uuid in enumerate(watch_uuid_list):
|
||||
# Re #438 - Check queue size every 100 watches for CPU efficiency (not every watch)
|
||||
if watch_index % 100 == 0:
|
||||
current_queue_size = update_q.qsize()
|
||||
if current_queue_size >= MAX_QUEUE_SIZE:
|
||||
logger.debug(f"Queue size limit reached ({current_queue_size}/{MAX_QUEUE_SIZE}), stopping scheduler this iteration.")
|
||||
break
|
||||
|
||||
for uuid in watch_uuid_list:
|
||||
now = time.time()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
@@ -1198,7 +1022,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
seconds_since_last_recheck = now - watch['last_checked']
|
||||
|
||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||
if not uuid in running_uuids and uuid not in queued_uuids:
|
||||
if not uuid in running_uuids and uuid not in [q_uuid.item['uuid'] for q_uuid in update_q.queue]:
|
||||
|
||||
# Proxies can be set to have a limit on seconds between which they can be called
|
||||
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
@@ -1223,7 +1047,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
priority = int(time.time())
|
||||
|
||||
# Into the queue with you
|
||||
queued_successfully = worker_pool.queue_item_async_safe(update_q,
|
||||
queued_successfully = worker_handler.queue_item_async_safe(update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=priority,
|
||||
item={'uuid': uuid})
|
||||
)
|
||||
@@ -1240,5 +1064,8 @@ def ticker_thread_check_time_launch_checks():
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
# Wait before checking the list again - saves CPU
|
||||
time.sleep(1)
|
||||
|
||||
# Should be low so we can break this out in testing
|
||||
app.config.exit.wait(WAIT_TIME_BETWEEN_LOOP)
|
||||
app.config.exit.wait(1)
|
||||
|
||||
@@ -7,6 +7,8 @@ from flask_babel import lazy_gettext as _l, gettext
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
|
||||
from changedetectionio.conditions.form import ConditionFormRow
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS, \
|
||||
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio import processors
|
||||
|
||||
@@ -35,7 +37,7 @@ from changedetectionio.widgets import TernaryNoneBooleanField
|
||||
|
||||
# default
|
||||
# each select <option data-enabled="enabled-0-0"
|
||||
from changedetectionio.browser_steps.browser_steps import browser_step_ui_config
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
|
||||
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
|
||||
@@ -492,6 +494,7 @@ class ValidateJinja2Template(object):
|
||||
Validates that a {token} is from a valid set
|
||||
"""
|
||||
def __call__(self, form, field):
|
||||
from changedetectionio import notification
|
||||
from changedetectionio.jinja2_custom import create_jinja_env
|
||||
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
|
||||
from jinja2.meta import find_undeclared_variables
|
||||
@@ -608,12 +611,13 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
raise ValidationError("XPath not permitted in this field!")
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
from changedetectionio.html_tools import SafeXPath3Parser
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
tree = html.fromstring("<html></html>")
|
||||
line = line.replace('xpath:', '')
|
||||
|
||||
try:
|
||||
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
|
||||
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
||||
except elementpath.ElementPathError as e:
|
||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||
raise ValidationError(message % (line, str(e)))
|
||||
@@ -723,10 +727,10 @@ class ValidateStartsWithRegex(object):
|
||||
raise ValidationError(self.message or _l("Invalid value."))
|
||||
|
||||
class quickWatchForm(Form):
|
||||
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
||||
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()])
|
||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
@@ -740,11 +744,12 @@ class commonSettingsForm(Form):
|
||||
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
|
||||
fetch_backend = RadioField(_l('Fetch Method'), choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
notification_body = TextAreaField(_l('Notification Body'), default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))])
|
||||
|
||||
@@ -758,7 +763,7 @@ class commonSettingsForm(Form):
|
||||
|
||||
|
||||
class importForm(Form):
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
urls = TextAreaField(_l('URLs'))
|
||||
xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))])
|
||||
file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||
@@ -776,13 +781,11 @@ class SingleBrowserStep(Form):
|
||||
|
||||
class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
browser_profile = RadioField(_l('Browser / Fetch method'), choices=[]) # populated at runtime in edit.py
|
||||
url = fields.URLField('Web Page URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group Tag', [validators.Optional()], default='')
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()], default='')
|
||||
|
||||
time_between_check = EnhancedFormField(
|
||||
TimeBetweenCheckForm,
|
||||
label=_l('Time Between Check'),
|
||||
conditional_field='time_between_check_use_default',
|
||||
conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT,
|
||||
conditional_test_function=validate_time_between_check_has_values
|
||||
@@ -816,7 +819,8 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
filter_text_removed = BooleanField(_l('Removed lines'), default=True)
|
||||
|
||||
trigger_text = StringListField(_l('Keyword triggers - Trigger/wait for text'), [validators.Optional(), ValidateListRegex()])
|
||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||
text_should_not_be_present = StringListField(_l('Block change-detection while text matches'), [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField(_l('Execute JavaScript before change detection'), render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
|
||||
@@ -832,8 +836,6 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
||||
use_page_title_in_list = TernaryNoneBooleanField(_l('Use page <title> in list'), default=None)
|
||||
|
||||
history_snapshot_max_length = IntegerField(_l('Number of history items per watch to keep'), render_kw={"style": "width: 5em;"}, validators=[validators.Optional(), validators.NumberRange(min=2)])
|
||||
|
||||
def extra_tab_content(self):
|
||||
return None
|
||||
|
||||
@@ -938,68 +940,14 @@ class SingleExtraBrowser(Form):
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||
|
||||
|
||||
class BrowserProfileForm(Form):
|
||||
"""Create or edit a named BrowserProfile stored in settings.application.browser_profiles."""
|
||||
|
||||
name = StringField(
|
||||
_l('Profile name'),
|
||||
[validators.DataRequired(), validators.Length(max=100)],
|
||||
render_kw={"placeholder": _l("e.g. Mobile Chrome, Bright Data CDP"), "maxlength": "100"}
|
||||
)
|
||||
fetch_backend = SelectField(
|
||||
_l('Fetch method'),
|
||||
choices=[], # populated at runtime from available_fetchers()
|
||||
)
|
||||
browser_connection_url = StringField(
|
||||
_l('Browser connection URL'),
|
||||
[
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(wss?|ws)://',
|
||||
flags=re.IGNORECASE,
|
||||
message=_l('Browser connection URL must start with ws:// or wss://')
|
||||
),
|
||||
ValidateSimpleURL(),
|
||||
],
|
||||
render_kw={"placeholder": "ws://my-chrome:3000", "size": 50}
|
||||
)
|
||||
viewport_width = IntegerField(
|
||||
_l('Viewport width (px)'),
|
||||
[validators.Optional(), validators.NumberRange(min=100, max=7680)],
|
||||
default=1280,
|
||||
render_kw={"style": "width:5em;"}
|
||||
)
|
||||
viewport_height = IntegerField(
|
||||
_l('Viewport height (px)'),
|
||||
[validators.Optional(), validators.NumberRange(min=100, max=4320)],
|
||||
default=1000,
|
||||
render_kw={"style": "width:5em;"}
|
||||
)
|
||||
block_images = BooleanField(_l('Block images (faster loads)'), default=False)
|
||||
block_fonts = BooleanField(_l('Block web fonts'), default=False)
|
||||
ignore_https_errors = BooleanField(_l('Ignore HTTPS/TLS errors'), default=False)
|
||||
user_agent = StringField(
|
||||
_l('User-Agent override'),
|
||||
[validators.Optional(), validators.Length(max=500)],
|
||||
render_kw={"placeholder": _l("Leave blank to use fetcher default"), "size": 60}
|
||||
)
|
||||
locale = StringField(
|
||||
_l('Locale'),
|
||||
[validators.Optional(), validators.Length(max=20)],
|
||||
render_kw={"placeholder": "en-US, de-DE, fr-FR …", "size": 15}
|
||||
)
|
||||
|
||||
class DefaultUAInputForm(Form):
|
||||
requests = StringField(_l('Plaintext requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
html_requests = StringField(_l('Plaintext requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL") or os.getenv("WEBDRIVER_URL"):
|
||||
playwright = StringField(_l('Chrome/Playwright requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
selenium = StringField(_l('Chrome/Selenium requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
puppeteer = StringField(_l('Chrome/Puppeteer requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
html_webdriver = StringField(_l('Chrome requests'), validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm, label=_l('Time Between Check'))
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm)
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
proxy = RadioField(_l('Default proxy'))
|
||||
jitter_seconds = IntegerField(_l('Random jitter seconds ± check'),
|
||||
@@ -1043,6 +991,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
|
||||
)
|
||||
empty_pages_are_a_change = BooleanField(_l('Treat empty pages as a change?'), default=False)
|
||||
fetch_backend = RadioField(_l('Fetch Method'), default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
global_ignore_text = StringListField(_l('Ignore Text'), [ValidateListRegex()])
|
||||
global_subtractive_selectors = StringListField(_l('Remove elements'), [ValidateCSSJSONXPATHInput(allow_json=False)])
|
||||
ignore_whitespace = BooleanField(_l('Ignore whitespace'))
|
||||
@@ -1058,7 +1007,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
|
||||
)
|
||||
|
||||
password = SaltyPasswordField(_l('Password'), render_kw={"autocomplete": "new-password"})
|
||||
password = SaltyPasswordField()
|
||||
pager_size = IntegerField(_l('Pager size'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
@@ -1084,8 +1033,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message=_l("Should contain zero or more attempts"))])
|
||||
|
||||
history_snapshot_max_length = IntegerField(_l('Number of history items per watch to keep'), render_kw={"style": "width: 5em;"}, validators=[validators.Optional(), validators.NumberRange(min=2)])
|
||||
ui = FormField(globalSettingsApplicationUIForm)
|
||||
|
||||
|
||||
|
||||
@@ -23,53 +23,6 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
|
||||
'unparsed-text',
|
||||
'unparsed-text-lines',
|
||||
'unparsed-text-available',
|
||||
'doc',
|
||||
'doc-available',
|
||||
'environment-variable',
|
||||
'available-environment-variables',
|
||||
]
|
||||
|
||||
|
||||
def _build_safe_xpath3_parser():
|
||||
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
|
||||
|
||||
XPath 3.0 includes functions that can read arbitrary files or environment variables:
|
||||
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
|
||||
- doc / doc-available (XML fetch from URI)
|
||||
- environment-variable / available-environment-variables (env var leakage)
|
||||
|
||||
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
|
||||
so removing entries here does not affect XPath3Parser itself.
|
||||
|
||||
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
|
||||
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
|
||||
"""
|
||||
import os
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
class SafeXPath3Parser(XPath3Parser):
|
||||
pass
|
||||
|
||||
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
|
||||
if env_override is not None:
|
||||
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
|
||||
else:
|
||||
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
|
||||
|
||||
for _fn in blocked:
|
||||
SafeXPath3Parser.symbol_table.pop(_fn, None)
|
||||
|
||||
return SafeXPath3Parser
|
||||
|
||||
|
||||
# Module-level singleton — built once, reused everywhere.
|
||||
SafeXPath3Parser = _build_safe_xpath3_parser()
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
@@ -230,6 +183,8 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
"""
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
parser = etree.HTMLParser()
|
||||
tree = None
|
||||
@@ -255,7 +210,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
@@ -280,9 +235,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
|
||||
# Drop element references before the finally block so tree.clear() can release
|
||||
# the libxml2 document immediately (elements pin the C-level doc via refcount).
|
||||
del r
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
@@ -487,25 +439,13 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||
else:
|
||||
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
|
||||
# Server may claim application/json but actually return JSONP
|
||||
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
|
||||
if jsonp_match:
|
||||
try:
|
||||
inner = jsonp_match.group(1).strip()
|
||||
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
|
||||
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter )
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
@@ -599,18 +539,6 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
|
||||
|
||||
|
||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
|
||||
"""
|
||||
Convert HTML content to plain text using inscriptis.
|
||||
|
||||
Thread-Safety: This function uses inscriptis.get_text() which internally calls
|
||||
lxml.html.fromstring() with the default parser. Testing with 50 concurrent threads
|
||||
confirms this approach is thread-safe and produces deterministic output.
|
||||
|
||||
Alternative Approach Rejected: An explicit HTMLParser instance (thread-local or fresh)
|
||||
would also be thread-safe, but was found to break change detection logic in subtle ways
|
||||
(test_check_basic_change_detection_functionality). The default parser provides correct
|
||||
and reliable behavior.
|
||||
"""
|
||||
from inscriptis import get_text
|
||||
from inscriptis.model.config import ParserConfig
|
||||
|
||||
@@ -621,33 +549,10 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
|
||||
)
|
||||
else:
|
||||
parser_config = None
|
||||
|
||||
if is_rss:
|
||||
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
|
||||
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
||||
else:
|
||||
# Use BS4 html.parser to strip bloat — SPA's often dump 10MB+ of CSS/JS into <head>,
|
||||
# causing inscriptis to silently give up. Regex-based stripping is unsafe because tags
|
||||
# can appear inside JSON data attributes with JS-escaped closing tags (e.g. <\/script>),
|
||||
# causing the regex to scan past the intended close and eat real page content.
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
# Strip tags that inscriptis cannot render as meaningful text and which can be very large.
|
||||
# svg/math: produce path-data/MathML garbage; canvas/iframe/template: no inscriptis handlers.
|
||||
# video/audio/picture are kept — they may contain meaningful fallback text or captions.
|
||||
for tag in soup.find_all(['head', 'script', 'style', 'noscript', 'svg',
|
||||
'math', 'canvas', 'iframe', 'template']):
|
||||
tag.decompose()
|
||||
|
||||
# SPAs often use <body style="display:none"> to hide content until JS loads.
|
||||
# inscriptis respects CSS display rules, so strip hiding styles from the body tag.
|
||||
body_tag = soup.find('body')
|
||||
if body_tag and body_tag.get('style'):
|
||||
style = body_tag['style']
|
||||
if re.search(r'\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b', style, re.IGNORECASE):
|
||||
logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{style}')")
|
||||
del body_tag['style']
|
||||
|
||||
html_content = str(soup)
|
||||
|
||||
text_content = get_text(html_content, config=parser_config)
|
||||
return text_content
|
||||
|
||||
@@ -1,113 +0,0 @@
|
||||
"""
|
||||
URL redirect validation module for preventing open redirect vulnerabilities.
|
||||
|
||||
This module provides functionality to safely validate redirect URLs, ensuring they:
|
||||
1. Point to internal routes only (no external redirects)
|
||||
2. Are properly normalized (preventing browser parsing differences)
|
||||
3. Match registered Flask routes (no fake/non-existent pages)
|
||||
4. Are fully logged for security monitoring
|
||||
|
||||
References:
|
||||
- https://flask-login.readthedocs.io/ (safe redirect patterns)
|
||||
- https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-v-user-logins
|
||||
- https://www.pythonkitchen.com/how-prevent-open-redirect-vulnerab-flask/
|
||||
"""
|
||||
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from flask import request
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def is_safe_url(target, app):
|
||||
"""
|
||||
Validate that a redirect URL is safe to prevent open redirect vulnerabilities.
|
||||
|
||||
This follows Flask/Werkzeug best practices by ensuring the redirect URL:
|
||||
1. Is a relative path starting with exactly one '/'
|
||||
2. Does not start with '//' (double-slash attack)
|
||||
3. Has no external protocol handlers
|
||||
4. Points to a valid registered route in the application
|
||||
5. Is properly normalized to prevent browser parsing differences
|
||||
|
||||
Args:
|
||||
target: The URL to validate (e.g., '/settings', '/login#top')
|
||||
app: The Flask application instance (needed for route validation)
|
||||
|
||||
Returns:
|
||||
bool: True if the URL is safe for redirection, False otherwise
|
||||
|
||||
Examples:
|
||||
>>> is_safe_url('/settings', app)
|
||||
True
|
||||
>>> is_safe_url('//evil.com', app)
|
||||
False
|
||||
>>> is_safe_url('/settings#general', app)
|
||||
True
|
||||
>>> is_safe_url('/fake-page', app)
|
||||
False
|
||||
"""
|
||||
if not target:
|
||||
return False
|
||||
|
||||
# Normalize the URL to prevent browser parsing differences
|
||||
# Strip whitespace and replace backslashes (which some browsers interpret as forward slashes)
|
||||
target = target.strip()
|
||||
target = target.replace('\\', '/')
|
||||
|
||||
# First, check if it starts with // or more (double-slash attack)
|
||||
if target.startswith('//'):
|
||||
logger.warning(f"Blocked redirect attempt with double-slash: {target}")
|
||||
return False
|
||||
|
||||
# Parse the URL to check for scheme and netloc
|
||||
parsed = urlparse(target)
|
||||
|
||||
# Block any URL with a scheme (http://, https://, javascript:, etc.)
|
||||
if parsed.scheme:
|
||||
logger.warning(f"Blocked redirect attempt with scheme: {target}")
|
||||
return False
|
||||
|
||||
# Block any URL with a network location (netloc)
|
||||
# This catches patterns like //evil.com, user@host, etc.
|
||||
if parsed.netloc:
|
||||
logger.warning(f"Blocked redirect attempt with netloc: {target}")
|
||||
return False
|
||||
|
||||
# At this point, we have a relative URL with no scheme or netloc
|
||||
# Use urljoin to resolve it and verify it points to the same host
|
||||
ref_url = urlparse(request.host_url)
|
||||
test_url = urlparse(urljoin(request.host_url, target))
|
||||
|
||||
# Check: ensure the resolved URL has the same netloc as current host
|
||||
if not (test_url.scheme in ('http', 'https') and ref_url.netloc == test_url.netloc):
|
||||
logger.warning(f"Blocked redirect attempt with mismatched netloc: {target}")
|
||||
return False
|
||||
|
||||
# Additional validation: Check if the URL matches a registered route
|
||||
# This prevents redirects to non-existent pages or unintended endpoints
|
||||
try:
|
||||
# Get the path without query string and fragment
|
||||
# Fragments (like #general) are automatically stripped by urlparse
|
||||
path = parsed.path
|
||||
|
||||
# Create a URL adapter bound to the server name
|
||||
adapter = app.url_map.bind(ref_url.netloc)
|
||||
|
||||
# Try to match the path to a registered route
|
||||
# This will raise NotFound if the route doesn't exist
|
||||
endpoint, values = adapter.match(path, return_rule=False)
|
||||
|
||||
# Block redirects to static file endpoints - these are catch-all routes
|
||||
# that would match arbitrary paths, potentially allowing unintended redirects
|
||||
if endpoint in ('static_content', 'static', 'static_flags'):
|
||||
logger.warning(f"Blocked redirect to static endpoint: {target}")
|
||||
return False
|
||||
|
||||
# Successfully matched a valid route
|
||||
logger.debug(f"Validated safe redirect to endpoint '{endpoint}': {target}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
# Route doesn't exist or can't be matched
|
||||
logger.warning(f"Blocked redirect to non-existent route: {target} (error: {e})")
|
||||
return False
|
||||
@@ -52,13 +52,7 @@ def render(template_str, **args: t.Any) -> str:
|
||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||
|
||||
def render_fully_escaped(content):
|
||||
"""
|
||||
Escape HTML content safely.
|
||||
|
||||
MEMORY LEAK FIX: Use markupsafe.escape() directly instead of creating
|
||||
Jinja2 environments (was causing 1M+ compilations per page load).
|
||||
Simpler, faster, and no concerns about environment state.
|
||||
"""
|
||||
from markupsafe import escape
|
||||
return str(escape(content))
|
||||
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
|
||||
template = env.from_string("{{ some_html|e }}")
|
||||
return template.render(some_html=content)
|
||||
|
||||
|
||||
@@ -29,25 +29,18 @@ def get_timeago_locale(flask_locale):
|
||||
"""
|
||||
locale_map = {
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
}
|
||||
return locale_map.get(flask_locale, flask_locale)
|
||||
|
||||
# Language metadata: flag icon CSS class and native name
|
||||
# Using flag-icons library: https://flagicons.lipis.dev/
|
||||
LANGUAGE_DATA = {
|
||||
'en_GB': {'flag': 'fi fi-gb fis', 'name': 'English (UK)'},
|
||||
'en_US': {'flag': 'fi fi-us fis', 'name': 'English (US)'},
|
||||
'en': {'flag': 'fi fi-gb fis', 'name': 'English'},
|
||||
'de': {'flag': 'fi fi-de fis', 'name': 'Deutsch'},
|
||||
'fr': {'flag': 'fi fi-fr fis', 'name': 'Français'},
|
||||
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
|
||||
@@ -57,7 +50,7 @@ LANGUAGE_DATA = {
|
||||
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
|
||||
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
|
||||
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
|
||||
'zh_Hant_TW': {'flag': 'fi fi-tw fis', 'name': '繁體中文'},
|
||||
'zh_TW': {'flag': 'fi fi-tw fis', 'name': '繁體中文'},
|
||||
'ru': {'flag': 'fi fi-ru fis', 'name': 'Русский'},
|
||||
'pl': {'flag': 'fi fi-pl fis', 'name': 'Polski'},
|
||||
'nl': {'flag': 'fi fi-nl fis', 'name': 'Nederlands'},
|
||||
@@ -68,7 +61,6 @@ LANGUAGE_DATA = {
|
||||
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
|
||||
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
|
||||
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
|
||||
'uk': {'flag': 'fi fi-ua fis', 'name': 'Українська'},
|
||||
}
|
||||
|
||||
|
||||
@@ -79,7 +71,10 @@ def get_available_languages():
|
||||
"""
|
||||
translations_dir = Path(__file__).parent / 'translations'
|
||||
|
||||
available = {}
|
||||
# Always include English as base language
|
||||
available = {
|
||||
'en': LANGUAGE_DATA['en']
|
||||
}
|
||||
|
||||
# Scan for translation directories
|
||||
if translations_dir.exists():
|
||||
@@ -90,10 +85,6 @@ def get_available_languages():
|
||||
if po_file.exists():
|
||||
available[lang_dir.name] = LANGUAGE_DATA[lang_dir.name]
|
||||
|
||||
# If no English variants found, fall back to adding en_GB as default
|
||||
if 'en_GB' not in available and 'en_US' not in available:
|
||||
available['en_GB'] = LANGUAGE_DATA['en_GB']
|
||||
|
||||
return available
|
||||
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ from os import getenv
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
|
||||
from changedetectionio.model.Tags import TagsDict
|
||||
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
@@ -30,28 +29,21 @@ class model(dict):
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "5")), # Number of threads, lower is better for slow connections
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
||||
'default_ua': {
|
||||
'requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT),
|
||||
'playwright': None,
|
||||
'selenium': None,
|
||||
'puppeteer': None,
|
||||
'html_requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT),
|
||||
'html_webdriver': None,
|
||||
}
|
||||
},
|
||||
'application': {
|
||||
# Custom notification content
|
||||
'all_paused': False,
|
||||
'all_muted': False,
|
||||
'api_access_token_enabled': True,
|
||||
'base_url' : None,
|
||||
'empty_pages_are_a_change': False,
|
||||
'browser_profile': None, # machine-name of the system-default BrowserProfile
|
||||
'browser_profiles': {}, # user-defined profiles keyed by machine name
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "requests"),
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
'history_snapshot_max_length': None,
|
||||
'ignore_whitespace': True,
|
||||
'ignore_status_codes': False, #@todo implement, as ternary.
|
||||
'ssim_threshold': '0.96', # Default SSIM threshold for screenshot comparison
|
||||
@@ -73,7 +65,7 @@ class model(dict):
|
||||
'schema_version' : 0,
|
||||
'shared_diff_access': False,
|
||||
'strip_ignored_lines': False,
|
||||
'tags': None, # Initialized in __init__ with real datastore_path
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
'ui': {
|
||||
'use_page_title_in_list': True,
|
||||
@@ -85,16 +77,10 @@ class model(dict):
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *arg, datastore_path=None, **kw):
|
||||
def __init__(self, *arg, **kw):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
# Capture any tags data passed in before base_config overwrites the structure
|
||||
existing_tags = self.get('settings', {}).get('application', {}).get('tags') or {}
|
||||
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
|
||||
self.update(deepcopy(self.base_config))
|
||||
# TagsDict requires the real datastore_path at runtime (cannot be set at class-definition time)
|
||||
if datastore_path is None:
|
||||
raise ValueError("App.model() requires 'datastore_path' keyword argument")
|
||||
self['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=datastore_path)
|
||||
|
||||
|
||||
def parse_headers_from_text_file(filepath):
|
||||
|
||||
@@ -1,48 +1,10 @@
|
||||
"""
|
||||
Tag/Group domain model for organizing and overriding watch settings.
|
||||
|
||||
ARCHITECTURE NOTE: Configuration Override Hierarchy
|
||||
===================================================
|
||||
|
||||
Tags can override Watch settings when overrides_watch=True.
|
||||
Current implementation requires manual checking in processors:
|
||||
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = datastore['settings']['application']['tags'][tag_uuid]
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
break
|
||||
|
||||
With Pydantic, this would be automatic via chain resolution:
|
||||
Watch → Tag (first with overrides_watch) → Global
|
||||
|
||||
See: Watch.py model docstring for full Pydantic architecture explanation
|
||||
See: processors/restock_diff/processor.py:184-192 for current manual implementation
|
||||
"""
|
||||
|
||||
from changedetectionio.model import watch_base
|
||||
from changedetectionio.model.persistence import EntityPersistenceMixin
|
||||
|
||||
class model(EntityPersistenceMixin, watch_base):
|
||||
"""
|
||||
Tag domain model - groups watches and can override their settings.
|
||||
|
||||
Tags inherit from watch_base to reuse all the same fields as Watch.
|
||||
When overrides_watch=True, tag settings take precedence over watch settings
|
||||
for all watches in this tag/group.
|
||||
|
||||
Fields:
|
||||
overrides_watch (bool): If True, this tag's settings override watch settings
|
||||
title (str): Display name for this tag/group
|
||||
uuid (str): Unique identifier
|
||||
... (all fields from watch_base can be set as tag-level overrides)
|
||||
|
||||
Resolution order when overrides_watch=True:
|
||||
Watch.field → Tag.field (if overrides_watch) → Global.field
|
||||
"""
|
||||
class model(watch_base):
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Parent class (watch_base) handles __datastore and __datastore_path
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
@@ -50,7 +12,3 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() and _get_commit_data() methods inherited from watch_base
|
||||
# Tag uses default _get_commit_data() (includes all keys)
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
_SENTINEL = object()
|
||||
|
||||
|
||||
class TagsDict(dict):
|
||||
"""Dict subclass that removes the corresponding tag.json file when a tag is deleted."""
|
||||
|
||||
def __init__(self, *args, datastore_path: str | os.PathLike, **kwargs) -> None:
|
||||
self._datastore_path = Path(datastore_path)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
super().__delitem__(key)
|
||||
tag_dir = self._datastore_path / key
|
||||
tag_json_file = tag_dir / "tag.json"
|
||||
if not os.path.exists(tag_json_file):
|
||||
logger.critical(f"Aborting deletion of directory '{tag_dir}' because '{tag_json_file}' does not exist.")
|
||||
return
|
||||
try:
|
||||
shutil.rmtree(tag_dir)
|
||||
logger.info(f"Deleted tag directory for tag {key!r}")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to delete tag directory for tag {key!r}: {e}")
|
||||
|
||||
def pop(self, key: str, default=_SENTINEL):
|
||||
"""Remove and return tag, deleting its tag.json file. Raises KeyError if missing and no default given."""
|
||||
if key in self:
|
||||
value = self[key]
|
||||
del self[key]
|
||||
return value
|
||||
if default is _SENTINEL:
|
||||
raise KeyError(key)
|
||||
return default
|
||||
@@ -1,66 +1,63 @@
|
||||
"""
|
||||
Watch domain model for change detection monitoring.
|
||||
|
||||
ARCHITECTURE NOTE: Configuration Override Hierarchy
|
||||
===================================================
|
||||
|
||||
This module implements Watch objects that inherit from dict (technical debt).
|
||||
The dream architecture would use Pydantic for:
|
||||
|
||||
1. CHAIN RESOLUTION (Watch → Tag → Global Settings)
|
||||
- Current: Manual resolution scattered across codebase
|
||||
- Future: @computed_field properties with automatic resolution
|
||||
- Examples: resolved_fetch_backend, resolved_restock_settings, etc.
|
||||
|
||||
2. DATABASE BACKEND ABSTRACTION
|
||||
- Current: Domain model tightly coupled to file-based JSON storage
|
||||
- Future: Domain model (Pydantic) separate from persistence layer
|
||||
- Enables: Easy migration to PostgreSQL, MongoDB, etc.
|
||||
|
||||
3. TYPE SAFETY & VALIDATION
|
||||
- Current: Dict access with no compile-time checks
|
||||
- Future: Type hints, IDE autocomplete, validation at boundaries
|
||||
|
||||
See class model docstring for detailed explanation and examples.
|
||||
See: processors/restock_diff/processor.py:184-192 for manual resolution example
|
||||
"""
|
||||
|
||||
from blinker import signal
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from . import watch_base
|
||||
from .persistence import EntityPersistenceMixin
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
from .. import jinja2_custom as safe_jinja
|
||||
from ..diff import ADDED_PLACEMARKER_OPEN
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||
|
||||
# Module-level favicon filename cache: data_dir → basename (or None)
|
||||
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
|
||||
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
|
||||
_FAVICON_FILENAME_CACHE: dict = {}
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
def _brotli_compress_worker(conn, filepath, mode=None):
|
||||
"""
|
||||
Save compressed data using native brotli with streaming compression.
|
||||
Uses chunked compression to minimize peak memory usage and malloc_trim()
|
||||
to force release of C-level memory back to the OS.
|
||||
Worker function to compress data with brotli in a separate process.
|
||||
This isolates memory - when process exits, OS reclaims all memory.
|
||||
|
||||
Args:
|
||||
conn: multiprocessing.Pipe connection to receive data
|
||||
filepath: destination file path
|
||||
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
|
||||
"""
|
||||
import brotli
|
||||
|
||||
try:
|
||||
# Receive data from parent process via pipe (avoids pickle overhead)
|
||||
contents = conn.recv()
|
||||
|
||||
if mode is not None:
|
||||
compressed_data = brotli.compress(contents, mode=mode)
|
||||
else:
|
||||
compressed_data = brotli.compress(contents)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(compressed_data)
|
||||
|
||||
# Send success status back
|
||||
conn.send(True)
|
||||
# No need for explicit cleanup - process exit frees all memory
|
||||
except Exception as e:
|
||||
logger.error(f"Brotli compression worker failed: {e}")
|
||||
conn.send(False)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _brotli_subprocess_save(contents, filepath, mode=None, timeout=30, fallback_uncompressed=False):
|
||||
"""
|
||||
Save compressed data using subprocess to isolate memory.
|
||||
Uses Pipe to avoid pickle overhead for large data.
|
||||
|
||||
Args:
|
||||
contents: data to compress (str or bytes)
|
||||
filepath: destination file path
|
||||
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
|
||||
timeout: subprocess timeout in seconds
|
||||
fallback_uncompressed: if True, save uncompressed on failure; if False, raise exception
|
||||
|
||||
Returns:
|
||||
@@ -70,175 +67,100 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
Exception: if compression fails and fallback_uncompressed is False
|
||||
"""
|
||||
import brotli
|
||||
import gc
|
||||
import ctypes
|
||||
import multiprocessing
|
||||
import sys
|
||||
|
||||
# Ensure contents are bytes
|
||||
if isinstance(contents, str):
|
||||
contents = contents.encode('utf-8')
|
||||
|
||||
# Use explicit spawn context for thread safety (avoids fork() with multi-threaded parent)
|
||||
# Always use spawn - consistent behavior in tests and production
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
# Run compression in subprocess using spawn (not fork)
|
||||
proc = ctx.Process(target=_brotli_compress_worker, args=(child_conn, filepath, mode))
|
||||
|
||||
# Windows-safe: Set daemon=False explicitly to avoid issues with process cleanup
|
||||
proc.daemon = False
|
||||
proc.start()
|
||||
|
||||
try:
|
||||
original_size = len(contents)
|
||||
logger.debug(f"Starting brotli streaming compression of {original_size} bytes.")
|
||||
# Send data to subprocess via pipe (avoids pickle)
|
||||
parent_conn.send(contents)
|
||||
|
||||
# Create streaming compressor
|
||||
compressor = brotli.Compressor(quality=6, mode=mode if mode is not None else brotli.MODE_GENERIC)
|
||||
# Wait for result with timeout
|
||||
if parent_conn.poll(timeout):
|
||||
success = parent_conn.recv()
|
||||
else:
|
||||
success = False
|
||||
logger.warning(f"Brotli compression subprocess timed out after {timeout}s")
|
||||
# Graceful termination with platform-aware cleanup
|
||||
try:
|
||||
proc.terminate()
|
||||
except Exception as term_error:
|
||||
logger.debug(f"Process termination issue (may be normal on Windows): {term_error}")
|
||||
|
||||
# Stream compress in chunks to minimize memory usage
|
||||
chunk_size = 65536 # 64KB chunks
|
||||
total_compressed_size = 0
|
||||
parent_conn.close()
|
||||
proc.join(timeout=5)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
# Process data in chunks
|
||||
offset = 0
|
||||
while offset < len(contents):
|
||||
chunk = contents[offset:offset + chunk_size]
|
||||
compressed_chunk = compressor.process(chunk)
|
||||
if compressed_chunk:
|
||||
f.write(compressed_chunk)
|
||||
total_compressed_size += len(compressed_chunk)
|
||||
offset += chunk_size
|
||||
# Force kill if still alive after graceful termination
|
||||
if proc.is_alive():
|
||||
try:
|
||||
if sys.platform == 'win32':
|
||||
# Windows: use kill() which is more forceful
|
||||
proc.kill()
|
||||
else:
|
||||
# Unix: terminate() already sent SIGTERM, now try SIGKILL
|
||||
proc.kill()
|
||||
proc.join(timeout=2)
|
||||
except Exception as kill_error:
|
||||
logger.warning(f"Failed to kill brotli compression process: {kill_error}")
|
||||
|
||||
# Finalize compression - critical for proper cleanup
|
||||
final_chunk = compressor.finish()
|
||||
if final_chunk:
|
||||
f.write(final_chunk)
|
||||
total_compressed_size += len(final_chunk)
|
||||
|
||||
logger.debug(f"Finished brotli compression - From {original_size} to {total_compressed_size} bytes.")
|
||||
|
||||
# Cleanup: Delete compressor, force Python GC, then force C-level memory release
|
||||
del compressor
|
||||
gc.collect()
|
||||
|
||||
# Force release of C-level memory back to OS (since brotli is a C library)
|
||||
try:
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass # malloc_trim not available on all systems (e.g., macOS)
|
||||
|
||||
return filepath
|
||||
# Check if file was created successfully
|
||||
if success and os.path.exists(filepath):
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Brotli compression error: {e}")
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
proc.terminate()
|
||||
proc.join(timeout=2)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Compression failed
|
||||
if fallback_uncompressed:
|
||||
logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed")
|
||||
fallback_path = filepath.replace('.br', '')
|
||||
with open(fallback_path, 'wb') as f:
|
||||
f.write(contents)
|
||||
return fallback_path
|
||||
else:
|
||||
raise Exception(f"Brotli compression failed for {filepath}: {e}")
|
||||
# Compression failed
|
||||
if fallback_uncompressed:
|
||||
logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed")
|
||||
fallback_path = filepath.replace('.br', '')
|
||||
with open(fallback_path, 'wb') as f:
|
||||
f.write(contents)
|
||||
return fallback_path
|
||||
else:
|
||||
raise Exception(f"Brotli compression subprocess failed for {filepath}")
|
||||
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
|
||||
|
||||
class model(EntityPersistenceMixin, watch_base):
|
||||
"""
|
||||
Watch domain model for monitoring URL changes.
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
Inherits from watch_base (which inherits dict) - see watch_base docstring for field documentation.
|
||||
|
||||
## Configuration Override Hierarchy (Chain Resolution)
|
||||
|
||||
The dream architecture uses a 3-level resolution chain:
|
||||
Watch settings → Tag/Group settings → Global settings
|
||||
|
||||
Current implementation is MANUAL (see processor.py:184-192 for example):
|
||||
- Processors manually check watch.get('field')
|
||||
- Then loop through watch.tags to find first tag with overrides_watch=True
|
||||
- Finally fall back to datastore['settings']['application']['field']
|
||||
|
||||
FUTURE: Pydantic-based chain resolution would enable:
|
||||
|
||||
```python
|
||||
# Instead of manual resolution in every processor:
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = datastore['settings']['application']['tags'][tag_uuid]
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
break
|
||||
|
||||
# Clean computed properties with automatic resolution:
|
||||
@computed_field
|
||||
def resolved_restock_settings(self) -> dict:
|
||||
if self.restock_settings:
|
||||
return self.restock_settings
|
||||
for tag_uuid in self.tags:
|
||||
tag = self._datastore.get_tag(tag_uuid)
|
||||
if tag.overrides_watch and tag.restock_settings:
|
||||
return tag.restock_settings
|
||||
return self._datastore.settings.restock_settings or {}
|
||||
|
||||
# Usage: watch.resolved_restock_settings (automatic, type-safe, tested once)
|
||||
```
|
||||
|
||||
Benefits of Pydantic migration:
|
||||
1. Single source of truth for resolution logic (not scattered across processors)
|
||||
2. Type safety + IDE autocomplete (watch.resolved_fetch_backend vs dict navigation)
|
||||
3. Database backend abstraction (domain model separate from persistence)
|
||||
4. Automatic validation at boundaries
|
||||
5. Self-documenting via type hints
|
||||
6. Easy to test resolution independently
|
||||
|
||||
Resolution chain examples that would benefit:
|
||||
- fetch_backend: watch → tag → global (see get_fetch_backend property)
|
||||
- notification_urls: watch → tag → global
|
||||
- time_between_check: watch → global (see threshold_seconds)
|
||||
- restock_settings: watch → tag (see processors/restock_diff/processor.py:184-192)
|
||||
- history_snapshot_max_length: watch → global (see save_history_blob:550-556)
|
||||
- All processor_config_* settings could use tag overrides
|
||||
|
||||
## Database Backend Abstraction with Pydantic
|
||||
|
||||
Current: Watch inherits dict, tightly coupled to file-based JSON storage
|
||||
Future: Domain model (Watch) separate from persistence layer
|
||||
|
||||
```python
|
||||
# Domain model (database-agnostic)
|
||||
class Watch(BaseModel):
|
||||
uuid: str
|
||||
url: str
|
||||
# ... validation, business logic
|
||||
|
||||
# Pluggable backends
|
||||
class DataStoreBackend(ABC):
|
||||
def save_watch(self, watch: Watch): ...
|
||||
def load_watch(self, uuid: str) -> Watch: ...
|
||||
|
||||
# Implementations: FileBackend, MongoBackend, PostgresBackend, etc.
|
||||
```
|
||||
|
||||
This would enable:
|
||||
- Easy migration between storage backends (file → postgres → mongodb)
|
||||
- Pydantic handles serialization/deserialization automatically
|
||||
- Domain logic stays clean (no storage concerns in Watch methods)
|
||||
|
||||
## Migration Path
|
||||
|
||||
Given existing codebase, incremental migration recommended:
|
||||
1. Create Pydantic models alongside existing dict-based models
|
||||
2. Add .to_pydantic() / .from_pydantic() bridge methods
|
||||
3. Gradually migrate code to use Pydantic models
|
||||
4. Remove dict inheritance once migration complete
|
||||
|
||||
See: watch_base docstring for technical debt discussion
|
||||
See: processors/restock_diff/processor.py:184-192 for manual resolution example
|
||||
See: Watch.py:550-556 for nested dict navigation that would become watch.resolved_*
|
||||
"""
|
||||
class model(watch_base):
|
||||
__newest_history_key = None
|
||||
__history_n = 0
|
||||
jitter_seconds = 0
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Validate __datastore before calling parent (Watch requires it)
|
||||
if not kw.get('__datastore'):
|
||||
raise ValueError("Watch object requires '__datastore' reference - cannot access global settings without it")
|
||||
|
||||
# Parent class (watch_base) handles __datastore and __datastore_path
|
||||
self.__datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
@@ -249,9 +171,6 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
# Be sure the cached timestamp is ready
|
||||
bump = self.history
|
||||
|
||||
# Note: __deepcopy__, __getstate__, and __setstate__ are inherited from watch_base
|
||||
# This prevents memory leaks by sharing __datastore reference instead of copying it
|
||||
|
||||
@property
|
||||
def viewed(self):
|
||||
# Don't return viewed when last_viewed is 0 and newest_key is 0
|
||||
@@ -264,6 +183,11 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def has_unviewed(self):
|
||||
return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
if not os.path.isdir(self.watch_data_dir):
|
||||
logger.debug(f"> Creating data dir {self.watch_data_dir}")
|
||||
os.mkdir(self.watch_data_dir)
|
||||
|
||||
@property
|
||||
def link(self):
|
||||
|
||||
@@ -319,8 +243,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
|
||||
# But preserve processor config files (they're configuration, not history data)
|
||||
# Use glob not rglob here for safety.
|
||||
for item in pathlib.Path(str(self.data_dir)).glob("*.*"):
|
||||
for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
|
||||
# Skip processor config files
|
||||
if item.name in processor_config_files:
|
||||
continue
|
||||
@@ -340,6 +263,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
'last_notification_error': False,
|
||||
'last_viewed': 0,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False,
|
||||
'remote_server_reply': None,
|
||||
'track_ldjson_price_data': None
|
||||
})
|
||||
@@ -353,53 +277,25 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def is_source_type_url(self):
|
||||
return self.get('url', '').startswith('source:')
|
||||
|
||||
@property
|
||||
def effective_browser_profile(self):
|
||||
"""Resolve the effective BrowserProfile for this watch.
|
||||
|
||||
Walks the chain: watch → tag (overrides_watch=True) → global settings → built-in fallback.
|
||||
Never raises. Returns a BrowserProfile instance.
|
||||
"""
|
||||
from changedetectionio.model.browser_profile import resolve_browser_profile, BUILTIN_REQUESTS
|
||||
if not self._datastore:
|
||||
return BUILTIN_REQUESTS
|
||||
try:
|
||||
return resolve_browser_profile(self, self._datastore)
|
||||
except Exception:
|
||||
return BUILTIN_REQUESTS
|
||||
|
||||
@property
|
||||
def get_fetch_backend(self):
|
||||
"""Legacy property — prefer effective_browser_profile.fetch_backend for new code.
|
||||
|
||||
Returns the raw fetch_backend stored on this watch (or 'requests' for PDFs).
|
||||
Does NOT walk the tag/global resolution chain.
|
||||
"""
|
||||
Like just using the `fetch_backend` key but there could be some logic
|
||||
:return:
|
||||
"""
|
||||
# Maybe also if is_image etc?
|
||||
# This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text.
|
||||
if self.is_pdf:
|
||||
return 'requests'
|
||||
return 'html_requests'
|
||||
|
||||
return self.get('fetch_backend')
|
||||
|
||||
@property
|
||||
def fetcher_supports_screenshots(self):
|
||||
"""Return True if the resolved fetcher for this watch supports screenshots."""
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_class = content_fetchers.get_fetcher(self.effective_browser_profile.fetch_backend)
|
||||
if fetcher_class is None:
|
||||
return False
|
||||
return bool(getattr(fetcher_class, 'supports_screenshots', False))
|
||||
|
||||
@property
|
||||
def is_pdf(self):
|
||||
url = str(self.get("url") or "").lower()
|
||||
content_type = str(self.get("content-type") or "").lower()
|
||||
|
||||
if content_type in ("none", "null", ""):
|
||||
content_type = ""
|
||||
|
||||
return (
|
||||
url.endswith(".pdf")
|
||||
or content_type.split(";")[0].strip() == "application/pdf"
|
||||
)
|
||||
# content_type field is set in the future
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/1392
|
||||
# Not sure the best logic here
|
||||
return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
@@ -434,11 +330,11 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
tmp_history = {}
|
||||
|
||||
# In the case we are only using the watch for processing without history
|
||||
if not self.data_dir:
|
||||
if not self.watch_data_dir:
|
||||
return []
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
if os.path.isfile(fname):
|
||||
logger.debug(f"Reading watch history index for {self.get('uuid')}")
|
||||
with open(fname, "r", encoding='utf-8') as f:
|
||||
@@ -451,13 +347,13 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
# Cross-platform: check for any path separator (works on Windows and Unix)
|
||||
if os.sep not in v and '/' not in v and '\\' not in v:
|
||||
# Relative filename only, no path separators
|
||||
v = os.path.join(self.data_dir, v)
|
||||
v = os.path.join(self.watch_data_dir, v)
|
||||
else:
|
||||
# It's possible that they moved the datadir on older versions
|
||||
# So the snapshot exists but is in a different path
|
||||
# Cross-platform: use os.path.basename instead of split('/')
|
||||
snapshot_fname = os.path.basename(v)
|
||||
proposed_new_path = os.path.join(self.data_dir, snapshot_fname)
|
||||
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||
v = proposed_new_path
|
||||
|
||||
@@ -474,7 +370,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
@property
|
||||
def has_history(self):
|
||||
fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
return os.path.isfile(fname)
|
||||
|
||||
@property
|
||||
@@ -577,49 +473,16 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
return f.read()
|
||||
|
||||
def _write_atomic(self, dest, data, mode='wb'):
|
||||
def _write_atomic(self, dest, data):
|
||||
"""Write data atomically to dest using a temp file"""
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.data_dir) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.replace(tmp_path, dest)
|
||||
|
||||
def history_trim(self, newest_n_items):
|
||||
from pathlib import Path
|
||||
import gc
|
||||
# Sort by timestamp (key)
|
||||
sorted_items = sorted(self.history.items(), key=lambda x: int(x[0]))
|
||||
|
||||
keep_part = dict(sorted_items[-newest_n_items:])
|
||||
delete_part = dict(sorted_items[:-newest_n_items])
|
||||
logger.info( f"[{self.get('uuid')}] Trimming history to most recent {newest_n_items} items, keeping {len(keep_part)} items deleting {len(delete_part)} items.")
|
||||
|
||||
if delete_part:
|
||||
for item in delete_part.items():
|
||||
try:
|
||||
Path(item[1]).unlink(missing_ok=True)
|
||||
except Exception as e:
|
||||
logger.critical(f"{str(e)}")
|
||||
finally:
|
||||
logger.debug(f"[{self.get('uuid')}] Deleted {item[1]} history snapshot")
|
||||
try:
|
||||
dest = os.path.join(self.data_dir, self.history_index_filename)
|
||||
output = "\r\n".join(
|
||||
f"{k},{Path(v).name}"
|
||||
for k, v in keep_part.items()
|
||||
)+"\r\n"
|
||||
self._write_atomic(dest=dest, data=output, mode='w')
|
||||
except Exception as e:
|
||||
logger.critical(f"{str(e)}")
|
||||
finally:
|
||||
logger.debug(f"[{self.get('uuid')}] Updated history index {dest}")
|
||||
|
||||
# reimport
|
||||
bump = self.history
|
||||
gc.collect()
|
||||
if not os.path.exists(dest):
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.replace(tmp_path, dest)
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
@@ -628,6 +491,8 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
logger.trace(f"{self.get('uuid')} - Updating {self.history_index_filename} with timestamp {timestamp}")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
|
||||
# Binary data - detect file type and save without compression
|
||||
@@ -645,37 +510,37 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
ext = 'bin'
|
||||
|
||||
snapshot_fname = f"{snapshot_id}.{ext}"
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents)
|
||||
logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)")
|
||||
|
||||
# Text data - use brotli compression if enabled and above threshold
|
||||
else:
|
||||
if not skip_brotli and len(contents) > BROTLI_COMPRESS_SIZE_THRESHOLD:
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
# Compressed text
|
||||
import brotli
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
if not os.path.exists(dest):
|
||||
try:
|
||||
actual_dest = _brotli_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True)
|
||||
actual_dest = _brotli_subprocess_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True)
|
||||
if actual_dest != dest:
|
||||
snapshot_fname = os.path.basename(actual_dest)
|
||||
except Exception as e:
|
||||
logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
|
||||
# Fallback to uncompressed
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
else:
|
||||
# Plain text
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
|
||||
# Append to history.txt atomically
|
||||
index_fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
index_fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
index_line = f"{timestamp},{snapshot_fname}\n"
|
||||
|
||||
with open(index_fname, 'a', encoding='utf-8') as f:
|
||||
@@ -687,17 +552,6 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
self.__newest_history_key = timestamp
|
||||
self.__history_n += 1
|
||||
|
||||
# MANUAL CHAIN RESOLUTION: Watch → Global
|
||||
# With Pydantic, this would become: maxlen = watch.resolved_history_snapshot_max_length
|
||||
# @computed_field def resolved_history_snapshot_max_length(self) -> Optional[int]:
|
||||
# if self.history_snapshot_max_length: return self.history_snapshot_max_length
|
||||
# if tag := self._get_override_tag(): return tag.history_snapshot_max_length
|
||||
# return self._datastore.settings.history_snapshot_max_length
|
||||
maxlen = self.get('history_snapshot_max_length') or self.get_global_setting('application', 'history_snapshot_max_length')
|
||||
|
||||
if maxlen and self.__history_n and self.__history_n > maxlen:
|
||||
self.history_trim(newest_n_items=maxlen)
|
||||
|
||||
# @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
|
||||
return snapshot_fname
|
||||
|
||||
@@ -750,7 +604,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
return not local_lines.issubset(existing_history)
|
||||
|
||||
def get_screenshot(self):
|
||||
fname = os.path.join(self.data_dir, "last-screenshot.png")
|
||||
fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
|
||||
@@ -765,7 +619,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
if not favicon_fname:
|
||||
return True
|
||||
try:
|
||||
fname = next(iter(glob.glob(os.path.join(self.data_dir, "favicon.*"))), None)
|
||||
fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None)
|
||||
logger.trace(f"Favicon file maybe found at {fname}")
|
||||
if os.path.isfile(fname):
|
||||
file_age = int(time.time() - os.path.getmtime(fname))
|
||||
@@ -798,7 +652,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
base = "favicon"
|
||||
extension = "ico"
|
||||
|
||||
fname = os.path.join(self.data_dir, f"favicon.{extension}")
|
||||
fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
|
||||
|
||||
try:
|
||||
# validate=True makes sure the string only contains valid base64 chars
|
||||
@@ -810,10 +664,6 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
# Invalidate module-level favicon filename cache for this watch
|
||||
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
@@ -827,23 +677,23 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def get_favicon_filename(self) -> str | None:
|
||||
"""
|
||||
Find any favicon.* file in the watch data directory.
|
||||
|
||||
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
|
||||
deepcopy (which drops instance attrs), and concurrent request races.
|
||||
Invalidated by bump_favicon() when a new favicon is saved.
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
Returns:
|
||||
str: Basename of the favicon file, or None if not found.
|
||||
bytes: Contents of the newest favicon file, or None if not found.
|
||||
"""
|
||||
if self.data_dir in _FAVICON_FILENAME_CACHE:
|
||||
return _FAVICON_FILENAME_CACHE[self.data_dir]
|
||||
|
||||
import glob
|
||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||
fname = os.path.basename(files[0]) if files else None
|
||||
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
|
||||
return fname
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
return None
|
||||
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
return os.path.basename(newest_file)
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
@@ -859,7 +709,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
import os
|
||||
import time
|
||||
|
||||
thumbnail_path = os.path.join(self.data_dir, "thumbnail.jpeg")
|
||||
thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
|
||||
top_trim = 500 # Pixels from top of screenshot to use
|
||||
|
||||
screenshot_path = self.get_screenshot()
|
||||
@@ -910,7 +760,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
return None
|
||||
|
||||
def __get_file_ctime(self, filename):
|
||||
fname = os.path.join(self.data_dir, filename)
|
||||
fname = os.path.join(self.watch_data_dir, filename)
|
||||
if os.path.isfile(fname):
|
||||
return int(os.path.getmtime(fname))
|
||||
return False
|
||||
@@ -935,9 +785,14 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def snapshot_error_screenshot_ctime(self):
|
||||
return self.__get_file_ctime('last-error-screenshot.png')
|
||||
|
||||
@property
|
||||
def watch_data_dir(self):
|
||||
# The base dir of the watch data
|
||||
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
|
||||
|
||||
def get_error_text(self):
|
||||
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.data_dir, "last-error.txt")
|
||||
fname = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
@@ -945,7 +800,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def get_error_snapshot(self):
|
||||
"""Return path to the screenshot that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.data_dir, "last-error-screenshot.png")
|
||||
fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
return False
|
||||
@@ -969,37 +824,6 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def toggle_mute(self):
|
||||
self['notification_muted'] ^= True
|
||||
|
||||
def _get_commit_data(self):
|
||||
"""
|
||||
Prepare watch data for commit.
|
||||
|
||||
Excludes processor_config_* keys (stored in separate files).
|
||||
Normalizes browser_steps to empty list if no meaningful steps.
|
||||
"""
|
||||
import copy
|
||||
|
||||
# Get base snapshot with lock
|
||||
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
snapshot = dict(self)
|
||||
else:
|
||||
snapshot = dict(self)
|
||||
|
||||
# Exclude processor config keys (stored separately)
|
||||
watch_dict = {k: copy.deepcopy(v) for k, v in snapshot.items() if not k.startswith('processor_config_')}
|
||||
|
||||
# Normalize browser_steps: if no meaningful steps, save as empty list
|
||||
if not self.has_browser_steps:
|
||||
watch_dict['browser_steps'] = []
|
||||
|
||||
return watch_dict
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() method inherited from watch_base
|
||||
|
||||
|
||||
def extra_notification_token_values(self):
|
||||
# Used for providing extra tokens
|
||||
# return {'widget': 555}
|
||||
@@ -1029,7 +853,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
if not csv_writer:
|
||||
# A file on the disk can be transferred much faster via flask than a string reply
|
||||
csv_output_filename = f"report-{self.get('uuid')}.csv"
|
||||
f = open(os.path.join(self.data_dir, csv_output_filename), 'w')
|
||||
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
|
||||
# @todo some headers in the future
|
||||
#fieldnames = ['Epoch seconds', 'Date']
|
||||
csv_writer = csv.writer(f,
|
||||
@@ -1071,7 +895,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def save_error_text(self, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
target_path = os.path.join(self.data_dir, "last-error.txt")
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
with open(target_path, 'w', encoding='utf-8') as f:
|
||||
f.write(contents)
|
||||
|
||||
@@ -1080,9 +904,9 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
import zlib
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(str(self.data_dir), "elements-error.deflate")
|
||||
target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate")
|
||||
else:
|
||||
target_path = os.path.join(str(self.data_dir), "elements.deflate")
|
||||
target_path = os.path.join(str(self.watch_data_dir), "elements.deflate")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
@@ -1097,9 +921,9 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def save_screenshot(self, screenshot: bytes, as_error=False):
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(self.data_dir, "last-error-screenshot.png")
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
else:
|
||||
target_path = os.path.join(self.data_dir, "last-screenshot.png")
|
||||
target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
@@ -1110,7 +934,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def get_last_fetched_text_before_filters(self):
|
||||
import brotli
|
||||
filepath = os.path.join(self.data_dir, 'last-fetched.br')
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
|
||||
if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0:
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
@@ -1125,21 +949,21 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def save_last_text_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.data_dir, 'last-fetched.br')
|
||||
_brotli_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
_brotli_subprocess_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
|
||||
def save_last_fetched_html(self, timestamp, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
_brotli_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
_brotli_subprocess_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
self._prune_last_fetched_html_snapshots()
|
||||
|
||||
def get_fetched_html(self, timestamp):
|
||||
import brotli
|
||||
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if os.path.isfile(filepath):
|
||||
with open(filepath, 'rb') as f:
|
||||
return (brotli.decompress(f.read()).decode('utf-8'))
|
||||
@@ -1154,7 +978,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
for index, timestamp in enumerate(dates):
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
# Keep only the first 2
|
||||
if index > 1 and os.path.isfile(filepath):
|
||||
@@ -1165,7 +989,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def get_browsersteps_available_screenshots(self):
|
||||
"For knowing which screenshots are available to show the user in BrowserSteps UI"
|
||||
available = []
|
||||
for f in Path(self.data_dir).glob('step_before-*.jpeg'):
|
||||
for f in Path(self.watch_data_dir).glob('step_before-*.jpeg'):
|
||||
step_n=re.search(r'step_before-(\d+)', f.name)
|
||||
if step_n:
|
||||
available.append(step_n.group(1))
|
||||
@@ -1174,13 +998,18 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def compile_error_texts(self, has_proxies=None):
|
||||
"""Compile error texts for this watch.
|
||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||
from flask import url_for, has_request_context
|
||||
from flask import url_for
|
||||
from markupsafe import Markup
|
||||
|
||||
output = [] # Initialize as list since we're using append
|
||||
last_error = self.get('last_error','')
|
||||
|
||||
has_app_context = has_request_context()
|
||||
try:
|
||||
url_for('settings.settings_page')
|
||||
except Exception as e:
|
||||
has_app_context = False
|
||||
else:
|
||||
has_app_context = True
|
||||
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
|
||||
@@ -2,175 +2,12 @@ import os
|
||||
import uuid
|
||||
|
||||
from changedetectionio import strtobool
|
||||
from .persistence import EntityPersistenceMixin, _determine_entity_type
|
||||
|
||||
__all__ = ['EntityPersistenceMixin', 'watch_base']
|
||||
|
||||
from ..browser_steps.browser_steps import browser_steps_get_valid_steps
|
||||
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||
|
||||
|
||||
class watch_base(dict):
|
||||
"""
|
||||
Base watch domain model (inherits from dict for backward compatibility).
|
||||
|
||||
WARNING: This class inherits from dict, which violates proper encapsulation.
|
||||
Dict inheritance is legacy technical debt that should be refactored to a proper
|
||||
domain model (e.g., Pydantic BaseModel) for better type safety and validation.
|
||||
|
||||
TODO: Migrate to Pydantic BaseModel for:
|
||||
- Type safety and IDE autocomplete
|
||||
- Automatic validation
|
||||
- Clear separation between domain model and serialization
|
||||
- Database backend abstraction (file → postgres → mongodb)
|
||||
- Configuration override chain resolution (Watch → Tag → Global)
|
||||
- Immutability options
|
||||
- Better testing
|
||||
- USE https://docs.pydantic.dev/latest/integrations/datamodel_code_generator TO BUILD THE MODEL FROM THE API-SPEC!!!
|
||||
|
||||
CHAIN RESOLUTION ARCHITECTURE:
|
||||
The dream is a 3-level override hierarchy:
|
||||
Watch settings → Tag/Group settings → Global settings
|
||||
|
||||
Current implementation: MANUAL resolution scattered across codebase
|
||||
- Processors manually check watch.get('field')
|
||||
- Loop through tags to find overrides_watch=True
|
||||
- Fall back to datastore['settings']['application']['field']
|
||||
|
||||
Pydantic implementation: AUTOMATIC resolution via @computed_field
|
||||
- Single source of truth for each setting's resolution logic
|
||||
- Type-safe, testable, self-documenting
|
||||
- Example: watch.resolved_fetch_backend (instead of nested dict navigation)
|
||||
|
||||
See: Watch.py model docstring for detailed Pydantic architecture plan
|
||||
See: Tag.py model docstring for tag override explanation
|
||||
See: processors/restock_diff/processor.py:184-192 for current manual example
|
||||
|
||||
Core Fields:
|
||||
uuid (str): Unique identifier for this watch (auto-generated)
|
||||
url (str): Target URL to monitor for changes
|
||||
title (str|None): Custom display name (overrides page_title if set)
|
||||
page_title (str|None): Title extracted from <title> tag of monitored page
|
||||
tags (List[str]): List of tag UUIDs for categorization
|
||||
tag (str): DEPRECATED - Old single-tag system, use tags instead
|
||||
|
||||
Check Configuration:
|
||||
processor (str): Processor type ('text_json_diff', 'restock_diff', etc.)
|
||||
fetch_backend (str): Fetcher to use ('system', 'html_requests', 'playwright', etc.)
|
||||
method (str): HTTP method ('GET', 'POST', etc.)
|
||||
headers (dict): Custom HTTP headers to send
|
||||
proxy (str|None): Preferred proxy server
|
||||
paused (bool): Whether change detection is paused
|
||||
|
||||
Scheduling:
|
||||
time_between_check (dict): Check interval {'weeks': int, 'days': int, 'hours': int, 'minutes': int, 'seconds': int}
|
||||
time_between_check_use_default (bool): Use global default interval if True
|
||||
time_schedule_limit (dict): Weekly schedule limiting when checks can run
|
||||
Structure: {
|
||||
'enabled': bool,
|
||||
'monday/tuesday/.../sunday': {
|
||||
'enabled': bool,
|
||||
'start_time': str ('HH:MM'),
|
||||
'duration': {'hours': str, 'minutes': str}
|
||||
}
|
||||
}
|
||||
|
||||
Content Filtering:
|
||||
include_filters (List[str]): CSS/XPath selectors to extract content
|
||||
subtractive_selectors (List[str]): Selectors to remove from content
|
||||
ignore_text (List[str]): Text patterns to ignore in change detection
|
||||
trigger_text (List[str]): Text/regex that must be present to trigger change
|
||||
text_should_not_be_present (List[str]): Text that should NOT be present
|
||||
extract_text (List[str]): Regex patterns to extract specific text after filtering
|
||||
|
||||
Text Processing:
|
||||
trim_text_whitespace (bool): Strip leading/trailing whitespace
|
||||
sort_text_alphabetically (bool): Sort lines alphabetically before comparison
|
||||
remove_duplicate_lines (bool): Remove duplicate lines
|
||||
check_unique_lines (bool): Compare against all history for unique lines
|
||||
strip_ignored_lines (bool|None): Remove lines matching ignore patterns
|
||||
|
||||
Change Detection Filters:
|
||||
filter_text_added (bool): Include added text in change detection
|
||||
filter_text_removed (bool): Include removed text in change detection
|
||||
filter_text_replaced (bool): Include replaced text in change detection
|
||||
|
||||
Browser Automation:
|
||||
browser_steps (List[dict]): Browser automation steps for JS-heavy sites
|
||||
browser_steps_last_error_step (int|None): Last step that caused error
|
||||
webdriver_delay (int|None): Seconds to wait after page load
|
||||
webdriver_js_execute_code (str|None): JavaScript to execute before extraction
|
||||
|
||||
Restock Detection:
|
||||
in_stock_only (bool): Only trigger on in-stock transitions
|
||||
follow_price_changes (bool): Monitor price changes
|
||||
has_ldjson_price_data (bool|None): Whether page has LD-JSON price data
|
||||
track_ldjson_price_data (str|None): Track LD-JSON price data ('ACCEPT', 'REJECT', None)
|
||||
price_change_threshold_percent (float|None): Minimum price change % to trigger
|
||||
|
||||
Notifications:
|
||||
notification_urls (List[str]): Apprise URLs for notifications
|
||||
notification_title (str|None): Custom notification title template
|
||||
notification_body (str|None): Custom notification body template
|
||||
notification_format (str): Notification format (e.g., 'System default', 'Text', 'HTML')
|
||||
notification_muted (bool): Disable notifications for this watch
|
||||
notification_screenshot (bool): Include screenshot in notifications
|
||||
notification_alert_count (int): Number of notifications sent
|
||||
last_notification_error (str|None): Last notification error message
|
||||
body (str|None): DEPRECATED? Legacy notification body field
|
||||
filter_failure_notification_send (bool): Send notification on filter failures
|
||||
|
||||
History & State:
|
||||
date_created (int|None): Unix timestamp of watch creation
|
||||
last_checked (int): Unix timestamp of last check
|
||||
last_viewed (int): History snapshot key of last user view
|
||||
last_error (str|bool): Last error message or False if no error
|
||||
check_count (int): Total number of checks performed
|
||||
fetch_time (float): Duration of last fetch in seconds
|
||||
consecutive_filter_failures (int): Counter for consecutive filter match failures
|
||||
previous_md5 (str|bool): MD5 hash of previous content
|
||||
history_snapshot_max_length (int|None): Max history snapshots to keep (None = use global)
|
||||
|
||||
Conditions:
|
||||
conditions (dict): Custom conditions for change detection logic
|
||||
conditions_match_logic (str): Logic operator ('ALL', 'ANY') for conditions
|
||||
|
||||
Metadata:
|
||||
content-type (str|None): Content-Type from last fetch
|
||||
remote_server_reply (str|None): Server header from last response
|
||||
ignore_status_codes (List[int]|None): HTTP status codes to ignore
|
||||
use_page_title_in_list (bool|None): Display page title in watch list (None = use system default)
|
||||
|
||||
Instance Attributes (not serialized):
|
||||
__datastore: Reference to parent DataStore (set externally after creation)
|
||||
data_dir: Filesystem path for this watch's data directory
|
||||
|
||||
Notes:
|
||||
- Many fields default to None to distinguish "not set" from "set to default"
|
||||
- When field is None, system-level defaults are used
|
||||
- Processor-specific configs (e.g., processor_config_*) are NOT stored in watch.json
|
||||
They are stored in separate {processor_name}.json files
|
||||
- This class is used for both Watch and Tag objects (tags reuse the structure)
|
||||
"""
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Store datastore reference (common to Watch and Tag)
|
||||
# Use single underscore to avoid name mangling issues in subclasses
|
||||
self._datastore = kw.get('__datastore')
|
||||
if kw.get('__datastore'):
|
||||
del kw['__datastore']
|
||||
|
||||
# Store datastore_path (common to Watch and Tag)
|
||||
self._datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
# IMPORTANT: Don't initialize __watch_was_edited yet!
|
||||
# We'll initialize it AFTER the initial update() call below
|
||||
# This prevents marking the watch as edited during initialization
|
||||
|
||||
self.update({
|
||||
# Custom notification content
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
@@ -179,7 +16,7 @@ class watch_base(dict):
|
||||
'body': None,
|
||||
'browser_steps': [],
|
||||
'browser_steps_last_error_step': None,
|
||||
'conditions' : [],
|
||||
'conditions' : {},
|
||||
'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
|
||||
'check_count': 0,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
@@ -187,7 +24,6 @@ class watch_base(dict):
|
||||
'content-type': None,
|
||||
'date_created': None,
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'browser_profile': 'system', # machine-name key of a BrowserProfile; 'system' → resolve via chain
|
||||
'fetch_backend': 'system', # plaintext, playwright etc
|
||||
'fetch_time': 0.0,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
@@ -196,7 +32,6 @@ class watch_base(dict):
|
||||
'filter_text_replaced': True,
|
||||
'follow_price_changes': True,
|
||||
'has_ldjson_price_data': None,
|
||||
'history_snapshot_max_length': None,
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'ignore_status_codes': None,
|
||||
@@ -217,6 +52,7 @@ class watch_base(dict):
|
||||
'page_title': None, # <title> from the page
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||
'processor': 'text_json_diff', # could be restock_diff or others from .processors
|
||||
'price_change_threshold_percent': None,
|
||||
'proxy': None, # Preferred proxy connection
|
||||
@@ -302,374 +138,5 @@ class watch_base(dict):
|
||||
|
||||
super(watch_base, self).__init__(*arg, **kw)
|
||||
|
||||
# Check if we're being initialized from an existing watch object
|
||||
# that has was_edited=True, so we can preserve the flag
|
||||
preserve_edited_flag = False
|
||||
if self.get('default'):
|
||||
# When creating a new watch object from an existing one (e.g., changing processor),
|
||||
# preserve the was_edited flag if it was True
|
||||
default_watch = self.get('default')
|
||||
if hasattr(default_watch, 'was_edited') and default_watch.was_edited:
|
||||
preserve_edited_flag = True
|
||||
del self['default']
|
||||
|
||||
# NOW initialize the edited flag after all initial setup is complete
|
||||
# This ensures initialization doesn't trigger the edited flag
|
||||
# But preserve it if the source watch had it set to True
|
||||
self.__watch_was_edited = preserve_edited_flag
|
||||
|
||||
def _mark_field_as_edited(self, key):
|
||||
"""
|
||||
Helper to mark a field as edited if it's writable.
|
||||
|
||||
Internal method used by __setitem__, update(), pop(), etc.
|
||||
"""
|
||||
# Don't track edits during initial load or if already edited
|
||||
if not hasattr(self, '_watch_base__watch_was_edited'):
|
||||
return
|
||||
if self.__watch_was_edited:
|
||||
return # Already marked as edited
|
||||
|
||||
# Import from shared schema utilities (no circular dependency)
|
||||
from .schema_utils import get_readonly_watch_fields
|
||||
readonly_fields = get_readonly_watch_fields()
|
||||
|
||||
# Additional system-managed fields not in OpenAPI spec (yet)
|
||||
# These are set by processors/workers and should not trigger edited flag
|
||||
additional_system_fields = {
|
||||
'last_check_status', # Set by processors
|
||||
'restock', # Set by restock processor
|
||||
'last_viewed', # Set by mark_all_viewed endpoint
|
||||
}
|
||||
|
||||
# Only mark as edited if this is a user-writable field
|
||||
if key not in readonly_fields and key not in additional_system_fields:
|
||||
self.__watch_was_edited = True
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""
|
||||
Override dict.__setitem__ to track when writable watch fields are modified.
|
||||
|
||||
This enables skipping reprocessing when:
|
||||
1. HTML content is unchanged (checksumFromPreviousCheckWasTheSame)
|
||||
2. AND watch configuration was not edited
|
||||
|
||||
Only sets the edited flag when field is NOT in readonly_fields (from OpenAPI spec).
|
||||
"""
|
||||
# Set the value first (always)
|
||||
super().__setitem__(key, value)
|
||||
# Mark as edited if writable field
|
||||
self._mark_field_as_edited(key)
|
||||
|
||||
def __delitem__(self, key):
|
||||
"""Override dict.__delitem__ to track deletions of writable fields."""
|
||||
super().__delitem__(key)
|
||||
self._mark_field_as_edited(key)
|
||||
|
||||
def update(self, *args, **kwargs):
|
||||
|
||||
if args and args[0].get('browser_steps'):
|
||||
args[0]['browser_steps'] = browser_steps_get_valid_steps(args[0].get('browser_steps'))
|
||||
|
||||
"""Override dict.update() to track modifications to writable fields."""
|
||||
# Call parent update first
|
||||
super().update(*args, **kwargs)
|
||||
|
||||
# Mark as edited for any writable fields that were updated
|
||||
# Handle both update(dict) and update(key=value) forms
|
||||
if args:
|
||||
for key in args[0].keys():
|
||||
self._mark_field_as_edited(key)
|
||||
for key in kwargs.keys():
|
||||
self._mark_field_as_edited(key)
|
||||
|
||||
|
||||
def pop(self, key, *args):
|
||||
"""Override dict.pop() to track removal of writable fields."""
|
||||
result = super().pop(key, *args)
|
||||
self._mark_field_as_edited(key)
|
||||
return result
|
||||
|
||||
def setdefault(self, key, default=None):
|
||||
"""Override dict.setdefault() to track modifications to writable fields."""
|
||||
# Only marks as edited if key didn't exist (i.e., a new value was set)
|
||||
existed = key in self
|
||||
result = super().setdefault(key, default)
|
||||
if not existed:
|
||||
self._mark_field_as_edited(key)
|
||||
return result
|
||||
|
||||
@property
|
||||
def was_edited(self):
|
||||
"""
|
||||
Check if watch configuration was edited since last processing.
|
||||
|
||||
Returns:
|
||||
bool: True if writable fields were modified, False otherwise
|
||||
"""
|
||||
return getattr(self, '_watch_base__watch_was_edited', False)
|
||||
|
||||
def reset_watch_edited_flag(self):
|
||||
"""
|
||||
Reset the watch edited flag after successful processing.
|
||||
|
||||
Call this after processing completes to allow future content-only change detection.
|
||||
"""
|
||||
self.__watch_was_edited = False
|
||||
|
||||
@classmethod
|
||||
def get_property_names(cls):
|
||||
"""
|
||||
Get all @property attribute names from this model class using introspection.
|
||||
|
||||
This discovers computed/derived properties that are not stored in the datastore.
|
||||
These properties should be filtered out during PUT/POST requests.
|
||||
|
||||
Returns:
|
||||
frozenset: Immutable set of @property attribute names from the model class
|
||||
"""
|
||||
import functools
|
||||
|
||||
# Create a cached version if it doesn't exist
|
||||
if not hasattr(cls, '_cached_get_property_names'):
|
||||
@functools.cache
|
||||
def _get_props():
|
||||
properties = set()
|
||||
# Use introspection to find all @property attributes
|
||||
for name in dir(cls):
|
||||
# Skip private/magic attributes
|
||||
if name.startswith('_'):
|
||||
continue
|
||||
try:
|
||||
attr = getattr(cls, name)
|
||||
# Check if it's a property descriptor
|
||||
if isinstance(attr, property):
|
||||
properties.add(name)
|
||||
except (AttributeError, TypeError):
|
||||
continue
|
||||
return frozenset(properties)
|
||||
|
||||
cls._cached_get_property_names = _get_props
|
||||
|
||||
return cls._cached_get_property_names()
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
"""
|
||||
Custom deepcopy for all watch_base subclasses (Watch, Tag, etc.).
|
||||
|
||||
CRITICAL FIX: Prevents copying large reference objects like __datastore
|
||||
which would cause exponential memory growth when Watch objects are deepcopied.
|
||||
|
||||
This is called by:
|
||||
- api/Watch.py:76 (API endpoint)
|
||||
- api/Tags.py:28 (Tags API)
|
||||
- processors/base.py:26 (EVERY processor run)
|
||||
- store/__init__.py:544 (clone watch)
|
||||
- And other locations
|
||||
"""
|
||||
from copy import deepcopy
|
||||
|
||||
# Create new instance without calling __init__
|
||||
cls = self.__class__
|
||||
new_obj = cls.__new__(cls)
|
||||
memo[id(self)] = new_obj
|
||||
|
||||
# Copy the dict data (all the settings)
|
||||
for key, value in self.items():
|
||||
new_obj[key] = deepcopy(value, memo)
|
||||
|
||||
# Copy instance attributes dynamically
|
||||
# This handles Watch-specific attrs (like __datastore) and any future subclass attrs
|
||||
for attr_name in dir(self):
|
||||
# Skip methods, special attrs, and dict keys
|
||||
if attr_name.startswith('_') and not attr_name.startswith('__'):
|
||||
# This catches _model__datastore, _model__history_n, etc.
|
||||
try:
|
||||
attr_value = getattr(self, attr_name)
|
||||
|
||||
# Special handling: Share references to large objects instead of copying
|
||||
# Examples: _datastore, __datastore, __app_reference, __global_settings, etc.
|
||||
if (attr_name == '_datastore' or
|
||||
attr_name.endswith('__datastore') or
|
||||
attr_name.endswith('__app')):
|
||||
# Share the reference (don't copy!) to prevent memory leaks
|
||||
setattr(new_obj, attr_name, attr_value)
|
||||
# Skip cache attributes - let them regenerate on demand
|
||||
elif 'cache' in attr_name.lower():
|
||||
pass # Don't copy caches
|
||||
# Copy regular instance attributes
|
||||
elif not callable(attr_value):
|
||||
setattr(new_obj, attr_name, attr_value)
|
||||
except AttributeError:
|
||||
pass # Attribute doesn't exist in this instance
|
||||
|
||||
return new_obj
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Custom pickle serialization for all watch_base subclasses.
|
||||
|
||||
Excludes large reference objects (like __datastore) from serialization.
|
||||
"""
|
||||
# Get the dict data
|
||||
state = dict(self)
|
||||
|
||||
# Collect instance attributes (excluding methods and large references)
|
||||
instance_attrs = {}
|
||||
for attr_name in dir(self):
|
||||
if attr_name.startswith('_') and not attr_name.startswith('__'):
|
||||
try:
|
||||
attr_value = getattr(self, attr_name)
|
||||
# Exclude large reference objects and caches from serialization
|
||||
if not (attr_name == '_datastore' or
|
||||
attr_name.endswith('__datastore') or
|
||||
attr_name.endswith('__app') or
|
||||
'cache' in attr_name.lower() or
|
||||
callable(attr_value)):
|
||||
instance_attrs[attr_name] = attr_value
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
if instance_attrs:
|
||||
state['__instance_metadata__'] = instance_attrs
|
||||
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""
|
||||
Custom pickle deserialization for all watch_base subclasses.
|
||||
|
||||
WARNING: Large reference objects (like __datastore) are NOT restored!
|
||||
Caller must restore these references after unpickling if needed.
|
||||
"""
|
||||
# Extract metadata
|
||||
metadata = state.pop('__instance_metadata__', {})
|
||||
|
||||
# Restore dict data
|
||||
self.update(state)
|
||||
|
||||
# Restore instance attributes
|
||||
for attr_name, attr_value in metadata.items():
|
||||
setattr(self, attr_name, attr_value)
|
||||
|
||||
@property
|
||||
def data_dir(self):
|
||||
"""
|
||||
The base directory for this watch/tag data (property, computed from UUID).
|
||||
|
||||
Common property for both Watch and Tag objects.
|
||||
Returns path like: /datastore/{uuid}/
|
||||
"""
|
||||
return os.path.join(self._datastore_path, self['uuid']) if self._datastore_path else None
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
"""
|
||||
Create the data directory if it doesn't exist.
|
||||
|
||||
Common method for both Watch and Tag objects.
|
||||
"""
|
||||
from loguru import logger
|
||||
if not os.path.isdir(self.data_dir):
|
||||
logger.debug(f"> Creating data dir {self.data_dir}")
|
||||
os.mkdir(self.data_dir)
|
||||
|
||||
def get_global_setting(self, *path):
|
||||
"""
|
||||
Get a setting from the global datastore configuration.
|
||||
|
||||
Args:
|
||||
*path: Path to the setting (e.g., 'application', 'history_snapshot_max_length')
|
||||
|
||||
Returns:
|
||||
The setting value, or None if not found
|
||||
|
||||
Example:
|
||||
maxlen = self.get_global_setting('application', 'history_snapshot_max_length')
|
||||
"""
|
||||
if not self._datastore:
|
||||
return None
|
||||
|
||||
try:
|
||||
# _datastore is a ChangeDetectionStore (has .data) or a plain dict (unit tests)
|
||||
store_data = self._datastore.data if hasattr(self._datastore, 'data') else self._datastore
|
||||
value = store_data['settings']
|
||||
for key in path:
|
||||
value = value[key]
|
||||
return value
|
||||
except (KeyError, TypeError):
|
||||
return None
|
||||
|
||||
def _get_commit_data(self):
|
||||
"""
|
||||
Prepare data for commit (can be overridden by subclasses).
|
||||
|
||||
Returns:
|
||||
dict: Data to serialize (filtered as needed by subclass)
|
||||
"""
|
||||
import copy
|
||||
|
||||
# Acquire datastore lock to prevent concurrent modifications during copy
|
||||
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
snapshot = dict(self)
|
||||
else:
|
||||
snapshot = dict(self)
|
||||
|
||||
# Deep copy snapshot (slower, but done outside lock to minimize contention)
|
||||
# Subclasses can override to filter keys (e.g., Watch excludes processor_config_*)
|
||||
return {k: copy.deepcopy(v) for k, v in snapshot.items()}
|
||||
|
||||
def _save_to_disk(self, data_dict, uuid):
|
||||
"""
|
||||
Save data to disk (must be implemented by subclasses).
|
||||
|
||||
Args:
|
||||
data_dict: Dictionary to save
|
||||
uuid: UUID for logging
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If subclass doesn't implement
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _save_to_disk()")
|
||||
|
||||
def commit(self):
|
||||
"""
|
||||
Save this watch/tag immediately to disk using atomic write.
|
||||
|
||||
Common commit logic for Watch and Tag objects.
|
||||
Subclasses override _get_commit_data() and _save_to_disk() for specifics.
|
||||
|
||||
Fire-and-forget: Logs errors but does not raise exceptions.
|
||||
Data remains in memory even if save fails, so next commit will retry.
|
||||
"""
|
||||
from loguru import logger
|
||||
|
||||
if not self.data_dir:
|
||||
entity_type = self.__class__.__name__
|
||||
logger.error(f"Cannot commit {entity_type} {self.get('uuid')} without datastore_path")
|
||||
return
|
||||
|
||||
uuid = self.get('uuid')
|
||||
if not uuid:
|
||||
entity_type = self.__class__.__name__
|
||||
logger.error(f"Cannot commit {entity_type} without UUID")
|
||||
return
|
||||
|
||||
# Get data from subclass (may filter keys)
|
||||
try:
|
||||
data_dict = self._get_commit_data()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to prepare commit data for {uuid}: {e}")
|
||||
return
|
||||
|
||||
# Save to disk via subclass implementation
|
||||
try:
|
||||
# Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
|
||||
entity_type = _determine_entity_type(self.__class__)
|
||||
filename = f"{entity_type}.json"
|
||||
self._save_to_disk(data_dict, uuid)
|
||||
logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to commit {uuid}: {e}")
|
||||
del self['default']
|
||||
@@ -1,354 +0,0 @@
|
||||
"""
|
||||
BrowserProfile — named, reusable browser/fetcher configuration.
|
||||
|
||||
Storage key
|
||||
-----------
|
||||
Profiles are stored in ``settings.application.browser_profiles`` as a plain dict
|
||||
keyed by *machine name* — a lowercase, underscore-separated slug derived from the
|
||||
human-readable ``name`` field:
|
||||
|
||||
'My Blocking Chrome' → 'my_blocking_chrome'
|
||||
'Custom CDP — Mobile (375px)' → 'custom_cdp_mobile_375px'
|
||||
|
||||
Using the machine name as the key means that deleting a profile and recreating
|
||||
it with the same name restores the original key, so all watches that referenced
|
||||
it continue to work without any manual re-linking.
|
||||
|
||||
Resolution chain
|
||||
----------------
|
||||
``resolve_browser_profile(watch, datastore)`` walks:
|
||||
|
||||
watch.browser_profile → first tag with overrides_watch=True →
|
||||
settings.application.browser_profile → built-in fallback
|
||||
|
||||
It never raises. Stale / missing machine-name references are logged and the
|
||||
resolver falls through to the next level.
|
||||
|
||||
Built-in profiles
|
||||
-----------------
|
||||
``BUILTIN_REQUESTS`` and ``BUILTIN_BROWSER`` are always available and cannot be
|
||||
deleted from the UI (``is_builtin=True``). Their machine names are stored in
|
||||
``RESERVED_MACHINE_NAMES`` to block user profiles from shadowing them.
|
||||
|
||||
Migration
|
||||
---------
|
||||
``store/updates.py::update_31`` converts the legacy ``fetch_backend`` field on
|
||||
watches, tags and global settings into ``browser_profile`` machine-name
|
||||
references. After that migration no legacy paths are needed here.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
NAME_MAX_LEN = 100
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class BrowserProfile(BaseModel):
|
||||
"""
|
||||
A named, reusable configuration for how a watch fetches its target URL.
|
||||
|
||||
The *machine name* (see ``get_machine_name()``) is the stable storage key.
|
||||
Updating ``name`` changes the machine name; any watch that referenced the
|
||||
old machine name will then fall back through the resolution chain until it
|
||||
is explicitly re-pointed. To replace a profile without breaking watches,
|
||||
delete it and recreate it with the *same* name.
|
||||
"""
|
||||
|
||||
name: str
|
||||
"""Human-readable label shown in the UI. Max 100 characters."""
|
||||
|
||||
fetch_backend: str = 'requests'
|
||||
"""
|
||||
Which fetch engine to use. This is the *clean* fetcher name without the
|
||||
``html_`` module prefix (e.g. ``'requests'``, ``'webdriver'``,
|
||||
``'playwright'``, ``'puppeteer'``, ``'cloakbrowser'``).
|
||||
|
||||
The module-level ``html_`` prefix (``html_requests``, ``html_webdriver``,
|
||||
…) is an implementation detail of ``content_fetchers/``. Use
|
||||
``get_fetcher_class_name()`` to obtain the full module attribute name when
|
||||
you need to look up the class.
|
||||
|
||||
Must be non-empty and contain only ``[a-z0-9_]`` characters.
|
||||
"""
|
||||
|
||||
is_builtin: bool = False
|
||||
"""Built-in profiles are always present and cannot be deleted from the UI."""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Browser-specific settings (silently ignored by html_requests)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
browser_connection_url: Optional[str] = None
|
||||
"""
|
||||
Custom CDP / WebSocket endpoint, e.g. ``ws://my-chrome:3000``.
|
||||
Overrides the system-wide ``PLAYWRIGHT_DRIVER_URL`` for this profile.
|
||||
Only meaningful for ``html_webdriver`` profiles.
|
||||
"""
|
||||
|
||||
viewport_width: int = 1280
|
||||
"""
|
||||
Browser viewport width in pixels.
|
||||
Common presets: 375 (iPhone), 768 (tablet), 1280 (desktop).
|
||||
"""
|
||||
|
||||
viewport_height: int = 1000
|
||||
"""
|
||||
Browser viewport height in pixels.
|
||||
Common presets: 812 (iPhone), 1024 (tablet), 1000 (desktop).
|
||||
"""
|
||||
|
||||
block_images: bool = False
|
||||
"""
|
||||
Block all image requests. Typically cuts page-load time by 40-70 % on
|
||||
image-heavy sites with no impact on text-based change detection.
|
||||
"""
|
||||
|
||||
block_fonts: bool = False
|
||||
"""Block web-font requests. Modest speed gain; rarely affects detection."""
|
||||
|
||||
user_agent: Optional[str] = None
|
||||
"""
|
||||
Override the browser User-Agent string.
|
||||
``None`` keeps the fetcher's built-in default, which already strips
|
||||
obvious headless markers such as ``HeadlessChrome``.
|
||||
"""
|
||||
|
||||
ignore_https_errors: bool = False
|
||||
"""
|
||||
Proceed even when the server's TLS certificate is invalid or self-signed.
|
||||
Useful for staging / development environments.
|
||||
"""
|
||||
|
||||
locale: Optional[str] = None
|
||||
"""
|
||||
Browser locale (e.g. ``en-US``, ``de-DE``).
|
||||
Sets the ``Accept-Language`` header and ``navigator.language``.
|
||||
Some sites serve different prices or copy based on locale.
|
||||
"""
|
||||
|
||||
model_config = {"frozen": False}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Validators
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@field_validator('fetch_backend')
|
||||
@classmethod
|
||||
def _validate_fetch_backend(cls, v: str) -> str:
|
||||
v = v.strip()
|
||||
if not v:
|
||||
raise ValueError('fetch_backend cannot be empty')
|
||||
if not re.fullmatch(r'[a-z0-9_]+', v):
|
||||
raise ValueError(
|
||||
f"fetch_backend must contain only lowercase letters, digits and underscores, got {v!r}"
|
||||
)
|
||||
if v.startswith('html_'):
|
||||
raise ValueError(
|
||||
f"fetch_backend should be the clean fetcher name without the 'html_' prefix "
|
||||
f"(e.g. 'requests', 'webdriver', 'playwright'). Got {v!r}. "
|
||||
f"Use get_fetcher_class_name() to obtain the full module attribute name."
|
||||
)
|
||||
return v
|
||||
|
||||
@field_validator('name')
|
||||
@classmethod
|
||||
def _validate_name(cls, v: str) -> str:
|
||||
v = v.strip()
|
||||
if not v:
|
||||
raise ValueError('Name cannot be empty')
|
||||
if len(v) > NAME_MAX_LEN:
|
||||
raise ValueError(f'Name must be {NAME_MAX_LEN} characters or less')
|
||||
return v
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Machine-name helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def machine_name_from_str(name: str) -> str:
|
||||
"""
|
||||
Convert a human name to a machine-safe storage key.
|
||||
|
||||
Transformation rules (applied in order):
|
||||
|
||||
1. Strip surrounding whitespace; lower-case.
|
||||
2. Replace runs of whitespace or hyphens with a single ``_``.
|
||||
3. Drop every character that is not ``[a-z0-9_]``.
|
||||
4. Collapse consecutive underscores.
|
||||
5. Strip leading / trailing underscores.
|
||||
6. Truncate to ``NAME_MAX_LEN`` characters.
|
||||
|
||||
Examples::
|
||||
|
||||
'My Blocking Browser Chrome' → 'my_blocking_browser_chrome'
|
||||
'Custom CDP — Mobile (375px)' → 'custom_cdp_mobile_375px'
|
||||
' Weird --- Name ' → 'weird_name'
|
||||
"""
|
||||
s = name.strip().lower()
|
||||
s = re.sub(r'[\s\-]+', '_', s) # whitespace / hyphens → underscore
|
||||
s = re.sub(r'[^a-z0-9_]', '', s) # drop everything else
|
||||
s = re.sub(r'_+', '_', s) # collapse repeated underscores
|
||||
s = s.strip('_') # drop leading / trailing underscores
|
||||
return s[:NAME_MAX_LEN]
|
||||
|
||||
def get_machine_name(self) -> str:
|
||||
"""Return the machine-safe storage key derived from this profile's ``name``."""
|
||||
return self.machine_name_from_str(self.name)
|
||||
|
||||
def get_fetcher_class_name(self) -> str:
|
||||
"""Return the clean fetcher name for this profile (same as ``fetch_backend``).
|
||||
|
||||
Use with ``content_fetchers.get_fetcher()``::
|
||||
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_cls = content_fetchers.get_fetcher(profile.get_fetcher_class_name())
|
||||
"""
|
||||
return self.fetch_backend
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Built-in profiles (always present, cannot be deleted)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
BUILTIN_REQUESTS = BrowserProfile(
|
||||
name='Direct HTTP (requests)',
|
||||
fetch_backend='requests',
|
||||
is_builtin=True,
|
||||
)
|
||||
|
||||
BUILTIN_PLAYWRIGHT = BrowserProfile(
|
||||
name='Browser (Chrome/Playwright)',
|
||||
fetch_backend='playwright_cdp',
|
||||
is_builtin=True,
|
||||
)
|
||||
|
||||
BUILTIN_SELENIUM = BrowserProfile(
|
||||
name='Browser (Chrome/Selenium)',
|
||||
fetch_backend='selenium',
|
||||
is_builtin=True,
|
||||
)
|
||||
|
||||
BUILTIN_PUPPETEER = BrowserProfile(
|
||||
name='Browser (Chrome/Puppeteer)',
|
||||
fetch_backend='puppeteer',
|
||||
is_builtin=True,
|
||||
)
|
||||
|
||||
# Backwards-compatible alias — code that imported BUILTIN_BROWSER keeps working.
|
||||
BUILTIN_BROWSER = BUILTIN_PLAYWRIGHT
|
||||
|
||||
# Keyed by machine name for O(1) lookup.
|
||||
_BUILTINS: dict[str, BrowserProfile] = {
|
||||
b.get_machine_name(): b
|
||||
for b in (BUILTIN_REQUESTS, BUILTIN_PLAYWRIGHT, BUILTIN_SELENIUM, BUILTIN_PUPPETEER)
|
||||
}
|
||||
|
||||
# Machine names that cannot be used by user-created profiles.
|
||||
RESERVED_MACHINE_NAMES: frozenset[str] = frozenset(_BUILTINS.keys())
|
||||
|
||||
|
||||
def get_default_browser_builtin() -> BrowserProfile:
|
||||
"""Return the built-in browser profile that matches the current environment.
|
||||
|
||||
Reads the same env vars as ``content_fetchers.get_active_browser_fetcher_name()``:
|
||||
|
||||
* ``PLAYWRIGHT_DRIVER_URL`` set + ``FAST_PUPPETEER_CHROME_FETCHER=False`` → Playwright
|
||||
* ``PLAYWRIGHT_DRIVER_URL`` set + ``FAST_PUPPETEER_CHROME_FETCHER=True`` → Puppeteer
|
||||
* Neither set → Selenium
|
||||
"""
|
||||
import os
|
||||
from changedetectionio.strtobool import strtobool
|
||||
if os.getenv('PLAYWRIGHT_DRIVER_URL', False):
|
||||
if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')):
|
||||
return BUILTIN_PLAYWRIGHT
|
||||
return BUILTIN_PUPPETEER
|
||||
return BUILTIN_SELENIUM
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lookup helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_builtin_profiles() -> dict[str, BrowserProfile]:
|
||||
"""Return a shallow copy of the built-in profiles dict (keyed by machine name)."""
|
||||
return dict(_BUILTINS)
|
||||
|
||||
|
||||
def get_profile(machine_name: str, store_profiles: dict) -> Optional[BrowserProfile]:
|
||||
"""
|
||||
Look up a ``BrowserProfile`` by machine name.
|
||||
|
||||
Built-ins are checked first and cannot be shadowed by user profiles.
|
||||
Returns ``None`` when the machine name is unknown or the stored data is
|
||||
corrupt (a warning is logged in the latter case).
|
||||
"""
|
||||
if machine_name in _BUILTINS:
|
||||
return _BUILTINS[machine_name]
|
||||
|
||||
raw = store_profiles.get(machine_name)
|
||||
if raw is None:
|
||||
return None
|
||||
|
||||
if isinstance(raw, BrowserProfile):
|
||||
return raw
|
||||
|
||||
try:
|
||||
return BrowserProfile(**raw)
|
||||
except Exception as exc:
|
||||
logger.warning(f"BrowserProfile '{machine_name}': failed to deserialize — {exc}")
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def resolve_browser_profile(watch, datastore) -> BrowserProfile:
|
||||
"""
|
||||
Resolve the effective ``BrowserProfile`` for *watch*.
|
||||
|
||||
Resolution chain
|
||||
~~~~~~~~~~~~~~~~
|
||||
1. ``watch['browser_profile']`` — explicit machine name set on the watch.
|
||||
2. First tag with ``overrides_watch=True`` that has ``browser_profile`` set.
|
||||
3. ``settings.application['browser_profile']`` — system-wide default.
|
||||
4. Built-in fallback: ``BUILTIN_REQUESTS`` (requests is always the safe default).
|
||||
|
||||
Never raises. A stale / missing machine-name reference produces a
|
||||
``logger.warning`` and the resolver continues down the chain.
|
||||
"""
|
||||
from changedetectionio.model.resolver import resolve_setting
|
||||
|
||||
store_profiles: dict = datastore.data['settings']['application'].get('browser_profiles', {})
|
||||
|
||||
machine_name = resolve_setting(
|
||||
watch, datastore,
|
||||
field_name='browser_profile',
|
||||
sentinel_values={'system', 'default', ''},
|
||||
default=None,
|
||||
require_tag_override=True,
|
||||
)
|
||||
|
||||
if machine_name:
|
||||
profile = get_profile(machine_name, store_profiles)
|
||||
if profile:
|
||||
return profile
|
||||
logger.warning(
|
||||
f"Watch {watch.get('uuid')!r}: browser_profile {machine_name!r} not found, "
|
||||
f"falling back through the chain"
|
||||
)
|
||||
|
||||
return BUILTIN_REQUESTS
|
||||
@@ -1,84 +0,0 @@
|
||||
"""
|
||||
Entity persistence mixin for Watch and Tag models.
|
||||
|
||||
Provides file-based persistence using atomic writes.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import inspect
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _determine_entity_type(cls):
|
||||
"""
|
||||
Determine entity type from class hierarchy (cached at class level).
|
||||
|
||||
Args:
|
||||
cls: The class to inspect
|
||||
|
||||
Returns:
|
||||
str: Entity type ('watch', 'tag', etc.)
|
||||
|
||||
Raises:
|
||||
ValueError: If entity type cannot be determined
|
||||
"""
|
||||
for base_class in inspect.getmro(cls):
|
||||
module_name = base_class.__module__
|
||||
if module_name.startswith('changedetectionio.model.'):
|
||||
# Get last part after dot: "changedetectionio.model.Watch" -> "watch"
|
||||
return module_name.split('.')[-1].lower()
|
||||
|
||||
raise ValueError(
|
||||
f"Cannot determine entity type for {cls.__module__}.{cls.__name__}. "
|
||||
f"Entity must inherit from a class in changedetectionio.model (Watch or Tag)."
|
||||
)
|
||||
|
||||
|
||||
class EntityPersistenceMixin:
|
||||
"""
|
||||
Mixin providing file persistence for watch_base subclasses (Watch, Tag, etc.).
|
||||
|
||||
This mixin provides the _save_to_disk() method required by watch_base.commit().
|
||||
It automatically determines the correct filename and size limits based on class hierarchy.
|
||||
|
||||
Usage:
|
||||
class model(EntityPersistenceMixin, watch_base): # in Watch.py
|
||||
pass
|
||||
|
||||
class model(EntityPersistenceMixin, watch_base): # in Tag.py
|
||||
pass
|
||||
"""
|
||||
|
||||
def _save_to_disk(self, data_dict, uuid):
|
||||
"""
|
||||
Save entity to disk using atomic write.
|
||||
|
||||
Implements the abstract method required by watch_base.commit().
|
||||
Automatically determines filename and size limits from class hierarchy.
|
||||
|
||||
Args:
|
||||
data_dict: Dictionary to save
|
||||
uuid: UUID for logging
|
||||
|
||||
Raises:
|
||||
ValueError: If entity type cannot be determined from class hierarchy
|
||||
"""
|
||||
# Import here to avoid circular dependency
|
||||
from changedetectionio.store.file_saving_datastore import save_entity_atomic
|
||||
|
||||
# Determine entity type (cached at class level, not instance level)
|
||||
entity_type = _determine_entity_type(self.__class__)
|
||||
|
||||
# Set filename and size limits based on entity type
|
||||
filename = f'{entity_type}.json'
|
||||
max_size_mb = 10 if entity_type == 'watch' else 1
|
||||
|
||||
# Save using generic function
|
||||
save_entity_atomic(
|
||||
self.data_dir,
|
||||
uuid,
|
||||
data_dict,
|
||||
filename=filename,
|
||||
entity_type=entity_type,
|
||||
max_size_mb=max_size_mb
|
||||
)
|
||||
@@ -1,63 +0,0 @@
|
||||
"""
|
||||
Unified Watch → Tag → Global settings cascade resolver.
|
||||
|
||||
All settings resolution follows the same priority order:
|
||||
1. Watch-level setting (if set and not a sentinel "use parent" value)
|
||||
2. First tag with overrides_watch=True that has the field set
|
||||
3. Global application settings
|
||||
4. Caller-supplied default
|
||||
|
||||
This replaces the previously scattered manual resolution loops found in
|
||||
notification_service.py, processors/base.py, and the restock processor.
|
||||
"""
|
||||
|
||||
|
||||
def resolve_setting(watch, datastore, field_name, *,
|
||||
sentinel_values=None,
|
||||
default=None,
|
||||
require_tag_override=True):
|
||||
"""
|
||||
Resolve a single setting value by walking the Watch → Tag → Global chain.
|
||||
|
||||
Args:
|
||||
watch: Watch dict / model object.
|
||||
datastore: App datastore (must have get_all_tags_for_watch() and
|
||||
data['settings']['application']).
|
||||
field_name: The setting key to look up at each level.
|
||||
sentinel_values: Set of values that mean "not configured here, keep looking".
|
||||
For example {'system'} for fetch_backend.
|
||||
default: Value returned when nothing is found in the chain.
|
||||
require_tag_override: If True (default), only tags where overrides_watch=True
|
||||
contribute to the cascade. Set to False when every tag
|
||||
that carries the field should be considered (e.g. for
|
||||
fields that make sense to merge/override at any tag level).
|
||||
|
||||
Returns:
|
||||
The first non-sentinel, non-empty value found, or *default*.
|
||||
"""
|
||||
_sentinels = set(sentinel_values) if sentinel_values else set()
|
||||
|
||||
def _is_unset(v):
|
||||
return v is None or v == '' or v in _sentinels
|
||||
|
||||
# 1. Watch level
|
||||
v = watch.get(field_name)
|
||||
if not _is_unset(v):
|
||||
return v
|
||||
|
||||
# 2. Tag level
|
||||
tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
if tags:
|
||||
for tag in tags.values():
|
||||
if require_tag_override and not tag.get('overrides_watch'):
|
||||
continue
|
||||
v = tag.get(field_name)
|
||||
if not _is_unset(v):
|
||||
return v
|
||||
|
||||
# 3. Global application settings
|
||||
v = datastore.data['settings']['application'].get(field_name)
|
||||
if not _is_unset(v):
|
||||
return v
|
||||
|
||||
return default
|
||||
@@ -1,92 +0,0 @@
|
||||
"""
|
||||
Schema utilities for Watch and Tag models.
|
||||
|
||||
Provides functions to extract readonly fields and properties from OpenAPI spec.
|
||||
Shared by both the model layer and API layer to avoid circular dependencies.
|
||||
"""
|
||||
|
||||
import functools
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
|
||||
Returns the YAML dict directly (not the OpenAPI object).
|
||||
"""
|
||||
import os
|
||||
import yaml
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _resolve_readonly_fields(schema_name):
|
||||
"""
|
||||
Generic helper to resolve readOnly fields, including allOf inheritance.
|
||||
|
||||
Args:
|
||||
schema_name: Name of the schema (e.g., 'Watch', 'Tag')
|
||||
|
||||
Returns:
|
||||
frozenset: All readOnly field names including inherited ones
|
||||
"""
|
||||
spec_dict = get_openapi_schema_dict()
|
||||
schema = spec_dict['components']['schemas'].get(schema_name, {})
|
||||
|
||||
readonly_fields = set()
|
||||
|
||||
# Handle allOf (schema inheritance)
|
||||
if 'allOf' in schema:
|
||||
for item in schema['allOf']:
|
||||
# Resolve $ref to parent schema
|
||||
if '$ref' in item:
|
||||
ref_path = item['$ref'].split('/')[-1]
|
||||
ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
|
||||
if 'properties' in ref_schema:
|
||||
for field_name, field_def in ref_schema['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
# Check schema-specific properties
|
||||
if 'properties' in item:
|
||||
for field_name, field_def in item['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
else:
|
||||
# Direct properties (no inheritance)
|
||||
if 'properties' in schema:
|
||||
for field_name, field_def in schema['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
|
||||
return frozenset(readonly_fields)
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_readonly_watch_fields():
|
||||
"""
|
||||
Extract readOnly field names from Watch schema in OpenAPI spec.
|
||||
|
||||
Returns readOnly fields from WatchBase (uuid, date_created) + Watch-specific readOnly fields.
|
||||
|
||||
Used by:
|
||||
- model/watch_base.py: Track when writable fields are edited
|
||||
- api/Watch.py: Filter readonly fields from PUT requests
|
||||
"""
|
||||
return _resolve_readonly_fields('Watch')
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_readonly_tag_fields():
|
||||
"""
|
||||
Extract readOnly field names from Tag schema in OpenAPI spec.
|
||||
|
||||
Returns readOnly fields from WatchBase (uuid, date_created) + Tag-specific readOnly fields.
|
||||
"""
|
||||
return _resolve_readonly_fields('Tag')
|
||||
@@ -1,3 +0,0 @@
|
||||
from .registry import registry, NotificationProfileType, AppriseProfileType
|
||||
|
||||
__all__ = ['registry', 'NotificationProfileType', 'AppriseProfileType']
|
||||
@@ -1,73 +0,0 @@
|
||||
"""
|
||||
Per-profile notification log.
|
||||
|
||||
Each profile gets its own log file at:
|
||||
{datastore_path}/notification-logs/{profile_uuid}.log
|
||||
|
||||
Entries are stored as JSON-lines (one JSON object per line).
|
||||
The file is capped at MAX_ENTRIES lines (oldest pruned first).
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
MAX_ENTRIES = 100
|
||||
_LOG_DIR = 'notification-logs'
|
||||
|
||||
|
||||
def _log_file(datastore_path: str, profile_uuid: str) -> str:
|
||||
return os.path.join(datastore_path, _LOG_DIR, f'{profile_uuid}.log')
|
||||
|
||||
|
||||
def write_profile_log(datastore_path: str, profile_uuid: str, *,
|
||||
watch_url: str = '',
|
||||
watch_uuid: str = '',
|
||||
status: str, # 'ok' | 'error' | 'test'
|
||||
message: str = ''):
|
||||
"""Append one log entry; prune to MAX_ENTRIES."""
|
||||
log_dir = os.path.join(datastore_path, _LOG_DIR)
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
|
||||
entry = json.dumps({
|
||||
'ts': datetime.now(tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC'),
|
||||
'watch_url': watch_url[:200],
|
||||
'watch_uuid': watch_uuid,
|
||||
'status': status,
|
||||
'message': message[:500],
|
||||
}, ensure_ascii=False)
|
||||
|
||||
path = _log_file(datastore_path, profile_uuid)
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8') as fh:
|
||||
lines = [l for l in fh.read().splitlines() if l.strip()]
|
||||
except FileNotFoundError:
|
||||
lines = []
|
||||
|
||||
lines.append(entry)
|
||||
lines = lines[-MAX_ENTRIES:]
|
||||
|
||||
with open(path, 'w', encoding='utf-8') as fh:
|
||||
fh.write('\n'.join(lines) + '\n')
|
||||
|
||||
|
||||
def read_profile_log(datastore_path: str, profile_uuid: str) -> list:
|
||||
"""Return log entries as a list of dicts, newest first."""
|
||||
path = _log_file(datastore_path, profile_uuid)
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8') as fh:
|
||||
lines = [l.strip() for l in fh if l.strip()]
|
||||
except FileNotFoundError:
|
||||
return []
|
||||
|
||||
entries = []
|
||||
for line in reversed(lines):
|
||||
try:
|
||||
entries.append(json.loads(line))
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
return entries
|
||||
|
||||
|
||||
def has_log(datastore_path: str, profile_uuid: str) -> bool:
|
||||
return os.path.exists(_log_file(datastore_path, profile_uuid))
|
||||
@@ -1,111 +0,0 @@
|
||||
"""
|
||||
Notification Profile Type plugin registry.
|
||||
|
||||
NotificationProfileType is the abstract base — the only contract is send().
|
||||
Plugins are free to use any delivery mechanism (Apprise, direct HTTP, SDK, etc.).
|
||||
|
||||
Built-in: AppriseProfileType (raw Apprise URL list).
|
||||
|
||||
Third-party plugins register additional types:
|
||||
|
||||
from changedetectionio.notification_profiles.registry import registry, NotificationProfileType
|
||||
|
||||
@registry.register
|
||||
class MyProfileType(NotificationProfileType):
|
||||
type_id = "mytype"
|
||||
display_name = "My Service"
|
||||
icon = "bell"
|
||||
template = "my_plugin/notification_profiles/types/mytype.html"
|
||||
|
||||
def send(self, config: dict, n_object: dict, datastore) -> bool:
|
||||
requests.post(config['webhook_url'], json={"text": n_object['notification_body']})
|
||||
return True
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class NotificationProfileType(ABC):
|
||||
type_id: str = NotImplemented
|
||||
display_name: str = NotImplemented
|
||||
icon: str = "bell" # feather icon name
|
||||
template: str = NotImplemented # Jinja2 partial rendered in the profile edit form
|
||||
|
||||
@abstractmethod
|
||||
def send(self, config: dict, n_object: dict, datastore) -> bool:
|
||||
"""
|
||||
Deliver the notification.
|
||||
|
||||
Args:
|
||||
config: The profile's config dict (type-specific fields).
|
||||
n_object: Fully-rendered NotificationContextData (title, body, format, etc.).
|
||||
datastore: App datastore for any extra lookups.
|
||||
|
||||
Returns True on success, False on failure (do not raise — log instead).
|
||||
"""
|
||||
|
||||
def validate(self, config: dict) -> None:
|
||||
"""Raise ValueError with a user-readable message on invalid config."""
|
||||
pass
|
||||
|
||||
def get_url_hint(self, config: dict) -> str:
|
||||
"""Short display string shown in the selector chip tooltip / dropdown row."""
|
||||
return ''
|
||||
|
||||
|
||||
class AppriseProfileType(NotificationProfileType):
|
||||
"""Delivers notifications via Apprise using a raw URL list."""
|
||||
|
||||
type_id = "apprise"
|
||||
display_name = "Apprise"
|
||||
icon = "bell"
|
||||
template = "notification_profiles/types/apprise.html"
|
||||
|
||||
def get_apprise_urls(self, config: dict) -> list:
|
||||
return config.get('notification_urls') or []
|
||||
|
||||
def send(self, config: dict, n_object, datastore) -> bool:
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
urls = self.get_apprise_urls(config)
|
||||
if not urls:
|
||||
return False
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
n_object = NotificationContextData(n_object)
|
||||
n_object['notification_urls'] = urls
|
||||
n_object['notification_title'] = config.get('notification_title') or n_object.get('notification_title')
|
||||
n_object['notification_body'] = config.get('notification_body') or n_object.get('notification_body')
|
||||
n_object['notification_format'] = config.get('notification_format') or n_object.get('notification_format')
|
||||
process_notification(n_object, datastore)
|
||||
return True
|
||||
|
||||
def get_url_hint(self, config: dict) -> str:
|
||||
urls = config.get('notification_urls') or []
|
||||
if urls:
|
||||
u = urls[0]
|
||||
return (u[:60] + '…') if len(u) > 60 else u
|
||||
return ''
|
||||
|
||||
|
||||
class _Registry:
|
||||
def __init__(self):
|
||||
self._types: dict = {}
|
||||
|
||||
def register(self, cls):
|
||||
"""Register a NotificationProfileType subclass. Usable as a decorator."""
|
||||
instance = cls()
|
||||
self._types[instance.type_id] = instance
|
||||
return cls
|
||||
|
||||
def get(self, type_id: str) -> NotificationProfileType:
|
||||
return self._types.get(type_id, self._types.get('apprise'))
|
||||
|
||||
def all(self) -> list:
|
||||
return list(self._types.values())
|
||||
|
||||
def choices(self) -> list:
|
||||
return [(t.type_id, t.display_name) for t in self._types.values()]
|
||||
|
||||
|
||||
registry = _Registry()
|
||||
registry.register(AppriseProfileType)
|
||||
@@ -1,49 +0,0 @@
|
||||
"""
|
||||
Resolve the full set of NotificationProfile objects that should fire for a given watch.
|
||||
|
||||
Merges profile UUIDs from: Watch → Tags → System (union, deduplicated).
|
||||
Mute cascade is checked separately via resolve_setting() before calling this.
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def resolve_notification_profiles(watch, datastore) -> list:
|
||||
"""
|
||||
Return list of (profile_dict, NotificationProfileType) tuples to fire for *watch*.
|
||||
|
||||
Profiles are deduplicated by UUID — if the same UUID appears at multiple levels
|
||||
it fires once, not multiple times.
|
||||
"""
|
||||
from changedetectionio.notification_profiles.registry import registry
|
||||
|
||||
all_profiles = datastore.data['settings']['application'].get('notification_profile_data', {})
|
||||
|
||||
seen = set()
|
||||
result = []
|
||||
|
||||
def _add(uuids):
|
||||
for uid in (uuids or []):
|
||||
if uid in seen:
|
||||
continue
|
||||
profile = all_profiles.get(uid)
|
||||
if not profile:
|
||||
logger.warning(f"Notification profile UUID {uid!r} not found, skipping")
|
||||
continue
|
||||
seen.add(uid)
|
||||
type_handler = registry.get(profile.get('type', 'apprise'))
|
||||
result.append((profile, type_handler))
|
||||
|
||||
# 1. Watch-level
|
||||
_add(watch.get('notification_profiles', []))
|
||||
|
||||
# 2. Tag/group level
|
||||
tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
||||
if tags:
|
||||
for tag in tags.values():
|
||||
_add(tag.get('notification_profiles', []))
|
||||
|
||||
# 3. System level
|
||||
_add(datastore.data['settings']['application'].get('notification_profiles', []))
|
||||
|
||||
return result
|
||||
@@ -54,128 +54,34 @@ def _check_cascading_vars(datastore, var_name, watch):
|
||||
return None
|
||||
|
||||
|
||||
class FormattableTimestamp(str):
|
||||
"""
|
||||
A str subclass representing a formatted datetime. As a plain string it renders
|
||||
with the default format, but can also be called with a custom format argument
|
||||
in Jinja2 templates:
|
||||
|
||||
{{ change_datetime }} → '2024-01-15 10:30:00 UTC'
|
||||
{{ change_datetime(format='%Y') }} → '2024'
|
||||
{{ change_datetime(format='%A') }} → 'Monday'
|
||||
{{ change_datetime(format='%Y-%m-%d') }} → '2024-01-15'
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
_DEFAULT_FORMAT = '%Y-%m-%d %H:%M:%S %Z'
|
||||
|
||||
def __new__(cls, timestamp):
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
dt_local = dt.astimezone(local_tz)
|
||||
try:
|
||||
formatted = dt_local.strftime(cls._DEFAULT_FORMAT)
|
||||
except Exception:
|
||||
formatted = dt_local.isoformat()
|
||||
instance = super().__new__(cls, formatted)
|
||||
instance._dt = dt_local
|
||||
return instance
|
||||
|
||||
def __call__(self, format=_DEFAULT_FORMAT):
|
||||
try:
|
||||
return self._dt.strftime(format)
|
||||
except Exception:
|
||||
return self._dt.isoformat()
|
||||
|
||||
|
||||
class FormattableDiff(str):
|
||||
"""
|
||||
A str subclass representing a rendered diff. As a plain string it renders
|
||||
with the default options for that variant, but can be called with custom
|
||||
arguments in Jinja2 templates:
|
||||
|
||||
{{ diff }} → default diff output
|
||||
{{ diff(lines=5) }} → truncate to 5 lines
|
||||
{{ diff(added_only=true) }} → only show added lines
|
||||
{{ diff(removed_only=true) }} → only show removed lines
|
||||
{{ diff(context=3) }} → 3 lines of context around changes
|
||||
{{ diff(word_diff=false) }} → line-level diff instead of word-level
|
||||
{{ diff(lines=10, added_only=true) }} → combine args
|
||||
{{ diff_added(lines=5) }} → works on any diff_* variant too
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs)
|
||||
else:
|
||||
rendered = ''
|
||||
instance = super().__new__(cls, rendered)
|
||||
instance._prev = prev_snapshot
|
||||
instance._current = current_snapshot
|
||||
instance._base_kwargs = base_kwargs
|
||||
return instance
|
||||
|
||||
def __call__(self, lines=None, added_only=False, removed_only=False, context=0,
|
||||
word_diff=None, case_insensitive=False, ignore_junk=False):
|
||||
from changedetectionio import diff as diff_module
|
||||
kwargs = dict(self._base_kwargs)
|
||||
|
||||
if added_only:
|
||||
kwargs['include_removed'] = False
|
||||
if removed_only:
|
||||
kwargs['include_added'] = False
|
||||
if context:
|
||||
kwargs['context_lines'] = int(context)
|
||||
if word_diff is not None:
|
||||
kwargs['word_diff'] = bool(word_diff)
|
||||
if case_insensitive:
|
||||
kwargs['case_insensitive'] = True
|
||||
if ignore_junk:
|
||||
kwargs['ignore_junk'] = True
|
||||
|
||||
result = diff_module.render_diff(self._prev or '', self._current or '', **kwargs)
|
||||
|
||||
if lines is not None:
|
||||
result = '\n'.join(result.splitlines()[:int(lines)])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||
class NotificationContextData(dict):
|
||||
def __init__(self, initial_data=None, **kwargs):
|
||||
# ValidateJinja2Template() validates against the keynames of this dict to check for valid tokens in the body (user submission)
|
||||
super().__init__({
|
||||
'base_url': None,
|
||||
'change_datetime': FormattableTimestamp(time.time()),
|
||||
'current_snapshot': None,
|
||||
'diff': FormattableDiff('', ''),
|
||||
'diff_clean': FormattableDiff('', '', include_change_type_prefix=False),
|
||||
'diff_added': FormattableDiff('', '', include_removed=False),
|
||||
'diff_added_clean': FormattableDiff('', '', include_removed=False, include_change_type_prefix=False),
|
||||
'diff_full': FormattableDiff('', '', include_equal=True),
|
||||
'diff_full_clean': FormattableDiff('', '', include_equal=True, include_change_type_prefix=False),
|
||||
'diff_patch': FormattableDiff('', '', patch_format=True),
|
||||
'diff_removed': FormattableDiff('', '', include_added=False),
|
||||
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
|
||||
'diff': None,
|
||||
'diff_clean': None,
|
||||
'diff_added': None,
|
||||
'diff_added_clean': None,
|
||||
'diff_full': None,
|
||||
'diff_full_clean': None,
|
||||
'diff_patch': None,
|
||||
'diff_removed': None,
|
||||
'diff_removed_clean': None,
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
'prev_snapshot': None,
|
||||
'preview_url': None,
|
||||
'screenshot': None,
|
||||
'triggered_text': None,
|
||||
'timestamp_from': None,
|
||||
'timestamp_to': None,
|
||||
'triggered_text': None,
|
||||
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
'watch_mime_type': None,
|
||||
'watch_tag': None,
|
||||
'watch_title': None,
|
||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||
'watch_uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
})
|
||||
|
||||
# Apply any initial data passed in
|
||||
@@ -197,7 +103,7 @@ class NotificationContextData(dict):
|
||||
So we can test the output in the notification body
|
||||
"""
|
||||
for key in self.keys():
|
||||
if key in ['uuid', 'time', 'watch_uuid', 'change_datetime'] or key.startswith('diff'):
|
||||
if key in ['uuid', 'time', 'watch_uuid']:
|
||||
continue
|
||||
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
||||
self[key] = rand_str
|
||||
@@ -209,6 +115,24 @@ class NotificationContextData(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def timestamp_to_localtime(timestamp):
|
||||
# Format the date using locale-aware formatting with timezone
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
|
||||
# Get local timezone-aware datetime
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
local_dt = dt.astimezone(local_tz)
|
||||
|
||||
# Format date with timezone - using strftime for locale awareness
|
||||
try:
|
||||
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
|
||||
except:
|
||||
# Fallback if locale issues
|
||||
formatted_date = local_dt.isoformat()
|
||||
|
||||
return formatted_date
|
||||
|
||||
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
|
||||
"""
|
||||
Efficiently renders only the diff placeholders that are actually used in the notification text.
|
||||
@@ -226,12 +150,13 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
Returns:
|
||||
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
import re
|
||||
from functools import lru_cache
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Define base kwargs for each diff variant — these become the stored defaults
|
||||
# on the FormattableDiff object, so {{ diff(lines=5) }} overrides on top of them
|
||||
# Define specifications for each diff variant
|
||||
diff_specs = {
|
||||
'diff': {'word_diff': word_diff},
|
||||
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
|
||||
@@ -244,15 +169,22 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
# Memoize render_diff to avoid duplicate renders with same kwargs
|
||||
@lru_cache(maxsize=4)
|
||||
def cached_render(kwargs_tuple):
|
||||
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only create FormattableDiff objects for diff keys actually used in the notification text
|
||||
# Only check and render diff keys that exist in NotificationContextData
|
||||
for key in NotificationContextData().keys():
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
kwargs = diff_specs[key]
|
||||
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
|
||||
ret[key] = cached_render(tuple(sorted(kwargs.items())))
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
@@ -266,7 +198,7 @@ def set_basic_notification_vars(current_snapshot, prev_snapshot, watch, triggere
|
||||
'current_snapshot': current_snapshot,
|
||||
'prev_snapshot': prev_snapshot,
|
||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||
'change_datetime': FormattableTimestamp(timestamp_changed) if timestamp_changed else None,
|
||||
'change_datetime': timestamp_to_localtime(timestamp_changed) if timestamp_changed else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch.get('uuid') if watch else None,
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
|
||||
@@ -105,75 +105,6 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def register_processor(self):
|
||||
"""Register an external processor plugin.
|
||||
|
||||
External packages can implement this hook to register custom processors
|
||||
that will be discovered alongside built-in processors.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with processor information:
|
||||
{
|
||||
'processor_name': str, # Machine name (e.g., 'osint_recon')
|
||||
'processor_module': module, # Module containing processor.py
|
||||
'processor_class': class, # The perform_site_check class
|
||||
'metadata': { # Optional metadata
|
||||
'name': str, # Display name
|
||||
'description': str, # Description
|
||||
'processor_weight': int,# Sort weight (lower = higher priority)
|
||||
'list_badge_text': str, # Badge text for UI
|
||||
}
|
||||
}
|
||||
Return None if this plugin doesn't provide a processor
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def update_handler_alter(update_handler, watch, datastore):
|
||||
"""Modify or wrap the update_handler before it processes a watch.
|
||||
|
||||
This hook is called after the update_handler (perform_site_check instance) is created
|
||||
but before it calls call_browser() and run_changedetection(). Plugins can use this to:
|
||||
- Wrap the handler to add logging/metrics
|
||||
- Modify handler configuration
|
||||
- Add custom preprocessing logic
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance that will process the watch
|
||||
watch: The watch dict being processed
|
||||
datastore: The application datastore
|
||||
|
||||
Returns:
|
||||
object or None: Return a modified/wrapped handler, or None to keep the original.
|
||||
If multiple plugins return handlers, they are chained in registration order.
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def update_finalize(update_handler, watch, datastore, processing_exception):
|
||||
"""Called after watch processing completes (success or failure).
|
||||
|
||||
This hook is called in the finally block after all processing is complete,
|
||||
allowing plugins to perform cleanup, update metrics, or log final status.
|
||||
|
||||
The plugin can access update_handler.last_logging_insert_id if it was stored
|
||||
during update_handler_alter, and use processing_exception to determine if
|
||||
the processing succeeded or failed.
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance (may be None if creation failed)
|
||||
watch: The watch dict that was processed (may be None if not loaded)
|
||||
datastore: The application datastore
|
||||
processing_exception: The exception from the main processing block, or None if successful.
|
||||
This does NOT include cleanup exceptions - only exceptions from
|
||||
the actual watch processing (fetch, diff, etc).
|
||||
|
||||
Returns:
|
||||
None: This hook doesn't return a value
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
@@ -237,23 +168,14 @@ def register_builtin_fetchers():
|
||||
This is called from content_fetchers/__init__.py after all fetchers are imported
|
||||
to avoid circular import issues.
|
||||
"""
|
||||
from changedetectionio.content_fetchers import requests, puppeteer, webdriver_selenium
|
||||
from changedetectionio.content_fetchers.playwright import CDP, chrome, firefox, webkit
|
||||
from changedetectionio.content_fetchers import requests, playwright, puppeteer, webdriver_selenium
|
||||
|
||||
# Register each built-in fetcher plugin
|
||||
if hasattr(requests, 'requests_plugin'):
|
||||
plugin_manager.register(requests.requests_plugin, 'builtin_requests')
|
||||
|
||||
if hasattr(CDP, 'cdp_plugin'):
|
||||
plugin_manager.register(CDP.cdp_plugin, 'builtin_playwright_cdp')
|
||||
|
||||
if hasattr(chrome, 'chrome_plugin'):
|
||||
plugin_manager.register(chrome.chrome_plugin, 'builtin_playwright_chrome')
|
||||
|
||||
if hasattr(firefox, 'firefox_plugin'):
|
||||
plugin_manager.register(firefox.firefox_plugin, 'builtin_playwright_firefox')
|
||||
|
||||
if hasattr(webkit, 'webkit_plugin'):
|
||||
plugin_manager.register(webkit.webkit_plugin, 'builtin_playwright_webkit')
|
||||
if hasattr(playwright, 'playwright_plugin'):
|
||||
plugin_manager.register(playwright.playwright_plugin, 'builtin_playwright')
|
||||
|
||||
if hasattr(puppeteer, 'puppeteer_plugin'):
|
||||
plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer')
|
||||
@@ -369,28 +291,57 @@ def get_active_plugins():
|
||||
|
||||
|
||||
def get_fetcher_capabilities(watch, datastore):
|
||||
"""Get capability flags for a watch's resolved fetcher.
|
||||
"""Get capability flags for a watch's fetcher.
|
||||
|
||||
Uses the BrowserProfile resolution chain (watch → tag → global → built-in)
|
||||
to determine the actual fetcher class, then reads its capability flags.
|
||||
Args:
|
||||
watch: The watch object/dict
|
||||
datastore: The datastore to resolve 'system' fetcher
|
||||
|
||||
Returns:
|
||||
dict: {'supports_browser_steps': bool, 'supports_screenshots': bool,
|
||||
'supports_xpath_element_data': bool}
|
||||
dict: Dictionary with capability flags:
|
||||
{
|
||||
'supports_browser_steps': bool,
|
||||
'supports_screenshots': bool,
|
||||
'supports_xpath_element_data': bool
|
||||
}
|
||||
"""
|
||||
from changedetectionio.model.browser_profile import resolve_browser_profile
|
||||
# Get the fetcher name from watch
|
||||
fetcher_name = watch.get('fetch_backend', 'system')
|
||||
|
||||
# Resolve 'system' to actual fetcher
|
||||
if fetcher_name == 'system':
|
||||
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
|
||||
# Get the fetcher class
|
||||
from changedetectionio import content_fetchers
|
||||
|
||||
profile = resolve_browser_profile(watch, datastore)
|
||||
fetcher_class = content_fetchers.get_fetcher(profile.fetch_backend)
|
||||
# Try to get from built-in fetchers first
|
||||
if hasattr(content_fetchers, fetcher_name):
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name)
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
|
||||
}
|
||||
|
||||
if fetcher_class is None:
|
||||
return {'supports_browser_steps': False, 'supports_screenshots': False, 'supports_xpath_element_data': False}
|
||||
# Try to get from plugin-provided fetchers
|
||||
# Query all plugins for registered fetchers
|
||||
plugin_fetchers = plugin_manager.hook.register_content_fetcher()
|
||||
for fetcher_registration in plugin_fetchers:
|
||||
if fetcher_registration:
|
||||
name, fetcher_class = fetcher_registration
|
||||
if name == fetcher_name:
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False)
|
||||
}
|
||||
|
||||
# Default: no capabilities
|
||||
return {
|
||||
'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False),
|
||||
'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False),
|
||||
'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False),
|
||||
'supports_browser_steps': False,
|
||||
'supports_screenshots': False,
|
||||
'supports_xpath_element_data': False
|
||||
}
|
||||
|
||||
|
||||
@@ -524,66 +475,4 @@ def get_plugin_template_paths():
|
||||
template_paths.append(templates_dir)
|
||||
logger.debug(f"Added plugin template path: {templates_dir}")
|
||||
|
||||
return template_paths
|
||||
|
||||
|
||||
def apply_update_handler_alter(update_handler, watch, datastore):
|
||||
"""Apply update_handler_alter hooks from all plugins.
|
||||
|
||||
Allows plugins to wrap or modify the update_handler before it processes a watch.
|
||||
Multiple plugins can chain modifications - each plugin receives the result from
|
||||
the previous plugin.
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance to potentially modify
|
||||
watch: The watch dict being processed
|
||||
datastore: The application datastore
|
||||
|
||||
Returns:
|
||||
object: The (potentially modified/wrapped) update_handler
|
||||
"""
|
||||
# Get all plugins that implement the update_handler_alter hook
|
||||
results = plugin_manager.hook.update_handler_alter(
|
||||
update_handler=update_handler,
|
||||
watch=watch,
|
||||
datastore=datastore
|
||||
)
|
||||
|
||||
# Chain results - each plugin gets the result from the previous one
|
||||
current_handler = update_handler
|
||||
if results:
|
||||
for result in results:
|
||||
if result is not None:
|
||||
logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}")
|
||||
current_handler = result
|
||||
|
||||
return current_handler
|
||||
|
||||
|
||||
def apply_update_finalize(update_handler, watch, datastore, processing_exception):
|
||||
"""Apply update_finalize hooks from all plugins.
|
||||
|
||||
Called in the finally block after watch processing completes, allowing plugins
|
||||
to perform cleanup, update metrics, or log final status.
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance (may be None)
|
||||
watch: The watch dict that was processed (may be None)
|
||||
datastore: The application datastore
|
||||
processing_exception: The exception from processing, or None if successful
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
try:
|
||||
# Call all plugins that implement the update_finalize hook
|
||||
plugin_manager.hook.update_finalize(
|
||||
update_handler=update_handler,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
processing_exception=processing_exception
|
||||
)
|
||||
except Exception as e:
|
||||
# Don't let plugin errors crash the worker
|
||||
logger.error(f"Error in update_finalize hook: {e}")
|
||||
logger.exception(f"update_finalize hook exception details:")
|
||||
return template_paths
|
||||
@@ -9,15 +9,6 @@ Some suggestions for the future
|
||||
|
||||
- `graphical`
|
||||
|
||||
## API schema extension (`api.yaml`)
|
||||
|
||||
A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`.
|
||||
Define a `components.schemas.processor_config_<name>` entry and it will be merged into `WatchBase` at startup,
|
||||
making `processor_config_<name>` a valid field on all watch create/update API calls.
|
||||
The fully merged spec is served live at `/api/v1/full-spec`.
|
||||
|
||||
See `restock_diff/api.yaml` for a working example.
|
||||
|
||||
## Todo
|
||||
|
||||
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from functools import lru_cache
|
||||
from loguru import logger
|
||||
from flask_babel import gettext, get_locale
|
||||
from flask_babel import gettext
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
@@ -17,11 +17,9 @@ def find_sub_packages(package_name):
|
||||
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def find_processors():
|
||||
"""
|
||||
Find all subclasses of DifferenceDetectionProcessor in the specified package.
|
||||
Results are cached to avoid repeated discovery.
|
||||
|
||||
:param package_name: The name of the package to scan for processor modules.
|
||||
:return: A list of (module, class) tuples.
|
||||
@@ -48,23 +46,6 @@ def find_processors():
|
||||
except (ModuleNotFoundError, ImportError) as e:
|
||||
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
||||
|
||||
# Discover plugin processors via pluggy
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_module = result.get('processor_module')
|
||||
processor_name = result.get('processor_name')
|
||||
|
||||
if processor_module and processor_name:
|
||||
processors.append((processor_module, processor_name))
|
||||
plugin_path = getattr(processor_module, '__file__', 'unknown location')
|
||||
logger.info(f"Registered plugin processor: {processor_name} from {plugin_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error loading plugin processors: {e}")
|
||||
|
||||
return processors
|
||||
|
||||
|
||||
@@ -116,138 +97,54 @@ def find_processor_module(processor_name):
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_module(processor_name):
|
||||
def available_processors():
|
||||
"""
|
||||
Get the actual processor module (with perform_site_check class) by name.
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
|
||||
Returns:
|
||||
module: The processor module containing perform_site_check, or None if not found
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
|
||||
:return: A list :)
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if processor_tuple:
|
||||
# Return the actual processor module (first element of tuple)
|
||||
return processor_tuple[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_submodule(processor_name, submodule_name):
|
||||
"""
|
||||
Get an optional submodule from a processor (e.g., 'difference', 'extract', 'preview').
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
submodule_name: Name of the submodule (e.g., 'difference', 'extract', 'preview')
|
||||
|
||||
Returns:
|
||||
module: The submodule if it exists, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if not processor_tuple:
|
||||
return None
|
||||
|
||||
processor_module = processor_tuple[0]
|
||||
parent_module = get_parent_module(processor_module)
|
||||
|
||||
if not parent_module:
|
||||
return None
|
||||
|
||||
# Try to import the submodule
|
||||
try:
|
||||
# For built-in processors: changedetectionio.processors.text_json_diff.difference
|
||||
# For plugin processors: changedetectionio_osint.difference
|
||||
parent_module_name = parent_module.__name__
|
||||
submodule_full_name = f"{parent_module_name}.{submodule_name}"
|
||||
return importlib.import_module(submodule_full_name)
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_plugin_processor_metadata():
|
||||
"""Get metadata from plugin processors."""
|
||||
metadata = {}
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_name = result.get('processor_name')
|
||||
meta = result.get('metadata', {})
|
||||
if processor_name:
|
||||
metadata[processor_name] = meta
|
||||
except Exception as e:
|
||||
logger.warning(f"Error getting plugin processor metadata: {e}")
|
||||
return metadata
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def _available_processors_cached(locale_str):
|
||||
"""
|
||||
Internal cached function that includes locale in cache key.
|
||||
This ensures translations are cached per-language instead of globally.
|
||||
|
||||
:param locale_str: The locale string (e.g., 'en', 'it', 'zh')
|
||||
:return: A list of tuples (processor_name, translated_description, weight)
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
|
||||
# Check if DISABLED_PROCESSORS env var is set
|
||||
disabled_processors_env = os.getenv('DISABLED_PROCESSORS', 'image_ssim_diff').strip()
|
||||
disabled_processors = []
|
||||
if disabled_processors_env:
|
||||
# Check if ALLOWED_PROCESSORS env var is set
|
||||
# For now we disable it, need to make a deploy with lots of new code and this will be an overload
|
||||
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', 'text_json_diff, restock_diff').strip()
|
||||
allowed_processors = None
|
||||
if allowed_processors_env:
|
||||
# Parse comma-separated list and strip whitespace
|
||||
disabled_processors = [p.strip() for p in disabled_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"DISABLED_PROCESSORS set, disabling: {disabled_processors}")
|
||||
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
|
||||
|
||||
available = []
|
||||
plugin_metadata = get_plugin_processor_metadata()
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Skip disabled processors
|
||||
if sub_package_name in disabled_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (in DISABLED_PROCESSORS)")
|
||||
# Filter by allowed processors if set
|
||||
if allowed_processors and sub_package_name not in allowed_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
|
||||
continue
|
||||
|
||||
# Check if this is a plugin processor
|
||||
if sub_package_name in plugin_metadata:
|
||||
meta = plugin_metadata[sub_package_name]
|
||||
description = gettext(meta.get('name', sub_package_name))
|
||||
# Plugin processors start from weight 10 to separate them from built-in processors
|
||||
weight = 100 + meta.get('processor_weight', 0)
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
else:
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
else:
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
|
||||
available.append((sub_package_name, description, weight))
|
||||
|
||||
@@ -257,36 +154,6 @@ def _available_processors_cached(locale_str):
|
||||
# Return as tuples without weight (for backwards compatibility)
|
||||
return [(name, desc) for name, desc, weight in available]
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||
|
||||
This function delegates to a locale-aware cached version to ensure translations
|
||||
are cached per-language instead of globally.
|
||||
|
||||
:return: A list of tuples (processor_name, translated_description)
|
||||
"""
|
||||
# Get current locale and use it as cache key
|
||||
# Convert Babel Locale object to string for use as cache key
|
||||
locale = get_locale()
|
||||
locale_str = str(locale) if locale else 'en'
|
||||
return _available_processors_cached(locale_str)
|
||||
|
||||
|
||||
def get_default_processor():
|
||||
"""
|
||||
Get the default processor to use when none is specified.
|
||||
Returns the first available processor based on weight (lowest weight = highest priority).
|
||||
This ensures forms auto-select a valid processor even when DISABLED_PROCESSORS filters the list.
|
||||
|
||||
:return: The processor name string (e.g., 'text_json_diff')
|
||||
"""
|
||||
available = available_processors()
|
||||
if available:
|
||||
return available[0][0] # Return the processor name from first tuple
|
||||
return 'text_json_diff' # Fallback if somehow no processors are available
|
||||
|
||||
|
||||
def get_processor_badge_texts():
|
||||
"""
|
||||
@@ -412,76 +279,3 @@ def get_processor_badge_css():
|
||||
|
||||
return '\n\n'.join(css_rules)
|
||||
|
||||
|
||||
def save_processor_config(datastore, watch_uuid, config_data):
|
||||
"""
|
||||
Save processor-specific configuration to JSON file.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration storage.
|
||||
|
||||
Args:
|
||||
datastore: The application datastore instance
|
||||
watch_uuid: UUID of the watch
|
||||
config_data: Dictionary of configuration data to save (with processor_config_* prefix removed)
|
||||
|
||||
Returns:
|
||||
bool: True if saved successfully, False otherwise
|
||||
"""
|
||||
if not config_data:
|
||||
return True
|
||||
|
||||
try:
|
||||
from changedetectionio.processors.base import difference_detection_processor
|
||||
|
||||
# Get processor name from watch
|
||||
watch = datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
logger.error(f"Cannot save processor config: watch {watch_uuid} not found")
|
||||
return False
|
||||
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = difference_detection_processor(datastore, watch_uuid)
|
||||
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, config_data)
|
||||
|
||||
logger.debug(f"Saved processor config to {config_filename}: {config_data}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def extract_processor_config_from_form_data(form_data):
|
||||
"""
|
||||
Extract processor_config_* fields from form data and return separate dicts.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration extraction.
|
||||
|
||||
IMPORTANT: This function modifies form_data in-place by removing processor_config_* fields.
|
||||
|
||||
Args:
|
||||
form_data: Dictionary of form data (will be modified in-place)
|
||||
|
||||
Returns:
|
||||
dict: Dictionary of processor config data (with processor_config_* prefix removed)
|
||||
"""
|
||||
processor_config_data = {}
|
||||
|
||||
# Use list() to create a copy of keys since we're modifying the dict
|
||||
for field_name in list(form_data.keys()):
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
# Save all values (including empty strings) to allow explicit clearing of settings
|
||||
processor_config_data[config_key] = form_data[field_name]
|
||||
# Remove from form_data to prevent it from reaching datastore
|
||||
del form_data[field_name]
|
||||
|
||||
return processor_config_data
|
||||
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
import asyncio
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from changedetectionio.browser_steps.browser_steps import browser_steps_get_valid_steps
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
from copy import deepcopy
|
||||
from abc import abstractmethod
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
||||
@@ -23,164 +18,92 @@ class difference_detection_processor():
|
||||
watch = None
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
preferred_proxy_override = None # Set externally to force a specific proxy (e.g. proxy checker)
|
||||
screenshot_format = SCREENSHOT_FORMAT_JPEG
|
||||
last_raw_content_checksum = None
|
||||
|
||||
def __init__(self, datastore, watch_uuid):
|
||||
self.datastore = datastore
|
||||
self.watch_uuid = watch_uuid
|
||||
|
||||
# Create a stable snapshot of the watch for processing
|
||||
# Why deepcopy?
|
||||
# 1. Prevents "dict changed during iteration" errors if watch is modified during processing
|
||||
# 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
|
||||
# 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
if self.watch is None:
|
||||
raise KeyError(f"Watch UUID {watch_uuid} not found in datastore (deleted before processing?)")
|
||||
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
# Load the last raw content checksum from file
|
||||
self.read_last_raw_content_checksum()
|
||||
|
||||
def update_last_raw_content_checksum(self, checksum):
|
||||
"""
|
||||
Save the raw content MD5 checksum to file.
|
||||
This is used for skip logic - avoid reprocessing if raw HTML unchanged.
|
||||
"""
|
||||
if not checksum:
|
||||
return
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
if not watch:
|
||||
return
|
||||
|
||||
data_dir = watch.data_dir
|
||||
if not data_dir:
|
||||
return
|
||||
|
||||
watch.ensure_data_dir_exists()
|
||||
checksum_file = os.path.join(data_dir, 'last-checksum.txt')
|
||||
|
||||
try:
|
||||
with open(checksum_file, 'w', encoding='utf-8') as f:
|
||||
f.write(checksum)
|
||||
self.last_raw_content_checksum = checksum
|
||||
except IOError as e:
|
||||
logger.warning(f"Failed to write checksum file for {self.watch_uuid}: {e}")
|
||||
|
||||
def read_last_raw_content_checksum(self):
|
||||
"""
|
||||
Read the last raw content MD5 checksum from file.
|
||||
Returns None if file doesn't exist (first run) or can't be read.
|
||||
"""
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
if not watch:
|
||||
self.last_raw_content_checksum = None
|
||||
return
|
||||
|
||||
data_dir = watch.data_dir
|
||||
if not data_dir:
|
||||
self.last_raw_content_checksum = None
|
||||
return
|
||||
|
||||
checksum_file = os.path.join(data_dir, 'last-checksum.txt')
|
||||
|
||||
if not os.path.isfile(checksum_file):
|
||||
self.last_raw_content_checksum = None
|
||||
return
|
||||
|
||||
try:
|
||||
with open(checksum_file, 'r', encoding='utf-8') as f:
|
||||
self.last_raw_content_checksum = f.read().strip()
|
||||
except IOError as e:
|
||||
logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
|
||||
self.last_raw_content_checksum = None
|
||||
|
||||
|
||||
async def validate_iana_url(self):
|
||||
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
|
||||
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
|
||||
through call_browser().
|
||||
"""
|
||||
if strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')):
|
||||
return
|
||||
parsed = urlparse(self.watch.link)
|
||||
if not parsed.hostname:
|
||||
return
|
||||
loop = asyncio.get_running_loop()
|
||||
if await loop.run_in_executor(None, is_private_hostname, parsed.hostname):
|
||||
raise Exception(
|
||||
f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
|
||||
)
|
||||
|
||||
async def call_browser(self):
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from changedetectionio.model.browser_profile import resolve_browser_profile, BUILTIN_REQUESTS
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Protect against file:, file:/, file:// access
|
||||
# Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
|
||||
if re.search(r'^file:', url.strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception("file:// type access is denied for security reasons.")
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
await self.validate_iana_url()
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
# Resolve the full browser profile for this watch (watch → tag → global → built-in)
|
||||
profile = resolve_browser_profile(self.watch, self.datastore)
|
||||
# Proxy ID "key"
|
||||
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(
|
||||
uuid=self.watch.get('uuid'))
|
||||
|
||||
# PDFs always use the requests fetcher — browsers render them in an embedded viewer
|
||||
# Pluggable content self.fetcher
|
||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||
prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend')
|
||||
|
||||
# In the case that the preferred fetcher was a browser config with custom connection URL..
|
||||
# @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..)
|
||||
custom_browser_connection_url = None
|
||||
if prefer_fetch_backend.startswith('extra_browser_'):
|
||||
(t, key) = prefer_fetch_backend.split('extra_browser_')
|
||||
connection = list(
|
||||
filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', [])))
|
||||
if connection:
|
||||
prefer_fetch_backend = 'html_webdriver'
|
||||
custom_browser_connection_url = connection[0].get('browser_connection_url')
|
||||
|
||||
# PDF should be html_requests because playwright will serve it up (so far) in a embedded page
|
||||
# @todo https://github.com/dgtlmoon/changedetection.io/issues/2019
|
||||
# @todo needs test to or a fix
|
||||
if self.watch.is_pdf:
|
||||
profile = BUILTIN_REQUESTS
|
||||
prefer_fetch_backend = "html_requests"
|
||||
|
||||
# Resolve proxy — custom browser endpoints skip proxy (connection URL is already routing)
|
||||
proxy_url = None
|
||||
preferred_proxy_id = self.preferred_proxy_override or self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
||||
if preferred_proxy_id:
|
||||
if not profile.browser_connection_url:
|
||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id, {}).get('url')
|
||||
logger.debug(f"Proxy '{preferred_proxy_id}' → '{proxy_url}' for {url}")
|
||||
else:
|
||||
logger.debug("Skipping proxy — custom browser_connection_url is set")
|
||||
|
||||
logger.debug(f"BrowserProfile '{profile.get_machine_name()}' (fetcher={profile.fetch_backend}) for watch {self.watch['uuid']}")
|
||||
|
||||
# Select the fetcher class
|
||||
# Grab the right kind of 'fetcher', (playwright, requests, etc)
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_class_name = profile.get_fetcher_class_name()
|
||||
if hasattr(content_fetchers, prefer_fetch_backend):
|
||||
# @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS
|
||||
if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps:
|
||||
# This is never supported in selenium anyway
|
||||
logger.warning(
|
||||
"Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.")
|
||||
from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher
|
||||
fetcher_obj = playwright_fetcher
|
||||
else:
|
||||
fetcher_obj = getattr(content_fetchers, prefer_fetch_backend)
|
||||
else:
|
||||
# What it referenced doesnt exist, Just use a default
|
||||
fetcher_obj = getattr(content_fetchers, "html_requests")
|
||||
|
||||
fetcher_obj = content_fetchers.get_fetcher(fetcher_class_name)
|
||||
if fetcher_obj is None:
|
||||
logger.warning(f"Fetcher '{fetcher_class_name}' not found, falling back to requests")
|
||||
fetcher_obj = content_fetchers.get_fetcher('requests')
|
||||
elif self.watch.has_browser_steps and not getattr(fetcher_obj, 'supports_browser_steps', False):
|
||||
# Browser steps require Playwright — override if the resolved fetcher doesn't support them
|
||||
logger.warning(f"Fetcher '{fetcher_class_name}' does not support browser steps, overriding to Playwright")
|
||||
fetcher_obj = content_fetchers.get_fetcher('playwright')
|
||||
proxy_url = None
|
||||
if preferred_proxy_id:
|
||||
# Custom browser endpoints should NOT have a proxy added
|
||||
if not prefer_fetch_backend.startswith('extra_browser_'):
|
||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
||||
else:
|
||||
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||
|
||||
self.fetcher = fetcher_obj(
|
||||
proxy_override=proxy_url,
|
||||
custom_browser_connection_url=profile.browser_connection_url,
|
||||
screenshot_format=self.screenshot_format,
|
||||
# BrowserProfile fields — browser fetchers use these; html_requests ignores them
|
||||
viewport_width=profile.viewport_width,
|
||||
viewport_height=profile.viewport_height,
|
||||
block_images=profile.block_images,
|
||||
block_fonts=profile.block_fonts,
|
||||
profile_user_agent=profile.user_agent,
|
||||
ignore_https_errors=profile.ignore_https_errors,
|
||||
locale=profile.locale,
|
||||
)
|
||||
logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
|
||||
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||
custom_browser_connection_url=custom_browser_connection_url,
|
||||
screenshot_format=self.screenshot_format
|
||||
)
|
||||
|
||||
if self.watch.has_browser_steps:
|
||||
self.fetcher.browser_steps = browser_steps_get_valid_steps(self.watch.get('browser_steps', []))
|
||||
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
@@ -188,8 +111,8 @@ class difference_detection_processor():
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
if ua and ua.get(fetcher_class_name):
|
||||
request_headers.update({'User-Agent': ua.get(fetcher_class_name)})
|
||||
if ua and ua.get(prefer_fetch_backend):
|
||||
request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)})
|
||||
|
||||
request_headers.update(self.watch.get('headers', {}))
|
||||
request_headers.update(self.datastore.get_all_base_headers())
|
||||
@@ -246,17 +169,6 @@ class difference_detection_processor():
|
||||
|
||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||
await self.fetcher.quit(watch=self.watch)
|
||||
self.fetcher.disk_cleanup_after_fetch()
|
||||
|
||||
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
|
||||
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
|
||||
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
|
||||
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
|
||||
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
|
||||
if self.fetcher.content and isinstance(self.fetcher.content, str):
|
||||
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
@@ -274,12 +186,12 @@ class difference_detection_processor():
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
data_dir = watch.data_dir
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
|
||||
if not data_dir:
|
||||
if not watch_data_dir:
|
||||
return {}
|
||||
|
||||
filepath = os.path.join(data_dir, filename)
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
return {}
|
||||
@@ -304,16 +216,16 @@ class difference_detection_processor():
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
data_dir = watch.data_dir
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
|
||||
if not data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no data_dir")
|
||||
if not watch_data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
|
||||
return
|
||||
|
||||
# Ensure directory exists
|
||||
watch.ensure_data_dir_exists()
|
||||
|
||||
filepath = os.path.join(data_dir, filename)
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
|
||||
try:
|
||||
# If merge is enabled, read existing data first
|
||||
@@ -338,16 +250,8 @@ class difference_detection_processor():
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to write extra watch config {filename}: {e}")
|
||||
|
||||
def get_raw_document_checksum(self):
|
||||
checksum = None
|
||||
|
||||
if self.fetcher.content:
|
||||
checksum = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||
|
||||
return checksum
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
def run_changedetection(self, watch):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
|
||||
@@ -42,7 +42,10 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
||||
# Get error information for the template
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
fetcher_supports_screenshots = watch.fetcher_supports_screenshots
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
@@ -59,7 +62,7 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
||||
last_error_screenshot=watch.get_error_snapshot(),
|
||||
last_error_text=watch.get_error_text(),
|
||||
screenshot=screenshot_url,
|
||||
fetcher_supports_screenshots=fetcher_supports_screenshots,
|
||||
is_html_webdriver=is_html_webdriver,
|
||||
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||
extra_title=f" - {watch.label} - Extract Data",
|
||||
extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')],
|
||||
|
||||
@@ -12,13 +12,6 @@ processor_description = "Visual/Screenshot change detection (Fast)"
|
||||
processor_name = "image_ssim_diff"
|
||||
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = False
|
||||
supports_text_filters_and_triggers_elements = False
|
||||
supports_request_type = True
|
||||
|
||||
PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json"
|
||||
|
||||
# Subprocess timeout settings
|
||||
|
||||
@@ -130,7 +130,7 @@ def get_asset(asset_name, watch, datastore, request):
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-Asset")
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
@@ -284,7 +284,7 @@ def _draw_bounding_box_if_configured(img_bytes, watch, datastore):
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-BoundingBox")
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
thread.join(timeout=15)
|
||||
|
||||
@@ -393,7 +393,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-ChangePercentage")
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
@@ -414,7 +414,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
|
||||
|
||||
# Load historical data if available (for charts/visualization)
|
||||
comparison_data = {}
|
||||
comparison_config_path = os.path.join(watch.data_dir, "visual_comparison_data.json")
|
||||
comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
|
||||
if os.path.isfile(comparison_config_path):
|
||||
try:
|
||||
with open(comparison_config_path, 'r') as f:
|
||||
|
||||
@@ -90,7 +90,7 @@ def on_config_save(watch, processor_config, datastore):
|
||||
processor_config['auto_track_region'] = False
|
||||
|
||||
# Remove old template file if exists
|
||||
template_path = os.path.join(watch.data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
if os.path.exists(template_path):
|
||||
os.remove(template_path)
|
||||
logger.debug(f"Removed old template file: {template_path}")
|
||||
|
||||
@@ -13,9 +13,14 @@ Research: https://github.com/libvips/pyvips/issues/234
|
||||
|
||||
import multiprocessing
|
||||
|
||||
# CRITICAL: Use 'spawn' context instead of 'fork' to avoid inheriting parent's
|
||||
# CRITICAL: Use 'spawn' instead of 'fork' to avoid inheriting parent's
|
||||
# LibVIPS threading state which can cause hangs in gaussblur operations
|
||||
# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
||||
try:
|
||||
multiprocessing.set_start_method('spawn', force=False)
|
||||
except RuntimeError:
|
||||
# Already set, ignore
|
||||
pass
|
||||
|
||||
|
||||
def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
|
||||
@@ -90,10 +95,9 @@ def generate_diff_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma,
|
||||
Returns:
|
||||
bytes: JPEG diff image or None on failure
|
||||
"""
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
parent_conn, child_conn = multiprocessing.Pipe()
|
||||
|
||||
p = ctx.Process(
|
||||
p = multiprocessing.Process(
|
||||
target=_worker_generate_diff,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height)
|
||||
)
|
||||
@@ -136,8 +140,7 @@ def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, threshold
|
||||
Returns:
|
||||
float: Change percentage
|
||||
"""
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
parent_conn, child_conn = multiprocessing.Pipe()
|
||||
|
||||
def _worker_calculate(conn):
|
||||
try:
|
||||
@@ -182,7 +185,7 @@ def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, threshold
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
p = ctx.Process(target=_worker_calculate, args=(child_conn,))
|
||||
p = multiprocessing.Process(target=_worker_calculate, args=(child_conn,))
|
||||
p.start()
|
||||
|
||||
result = 0.0
|
||||
@@ -230,8 +233,7 @@ def compare_images_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma,
|
||||
tuple: (changed_detected, change_percentage)
|
||||
"""
|
||||
print(f"[Parent] Starting compare_images_isolated subprocess", flush=True)
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
parent_conn, child_conn = multiprocessing.Pipe()
|
||||
|
||||
def _worker_compare(conn):
|
||||
try:
|
||||
@@ -299,7 +301,7 @@ def compare_images_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma,
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
p = ctx.Process(target=_worker_compare, args=(child_conn,))
|
||||
p = multiprocessing.Process(target=_worker_compare, args=(child_conn,))
|
||||
print(f"[Parent] Starting subprocess (pid will be assigned)", flush=True)
|
||||
p.start()
|
||||
print(f"[Parent] Subprocess started (pid={p.pid}), waiting for result (30s timeout)", flush=True)
|
||||
|
||||
@@ -30,7 +30,7 @@ class perform_site_check(difference_detection_processor):
|
||||
# Override to use PNG format for better image comparison (JPEG compression creates noise)
|
||||
screenshot_format = SCREENSHOT_FORMAT_PNG
|
||||
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
def run_changedetection(self, watch):
|
||||
"""
|
||||
Perform screenshot comparison using OpenCV subprocess handler.
|
||||
|
||||
@@ -204,7 +204,7 @@ class perform_site_check(difference_detection_processor):
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-Processor")
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
|
||||
@@ -100,13 +100,7 @@ class guess_stream_type():
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
|
||||
# A JSONP response starts with an identifier followed by '(' - not valid JSON
|
||||
if re.match(r'^\w[\w.]*\s*\(', test_content):
|
||||
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
|
||||
self.is_plaintext = True
|
||||
else:
|
||||
self.is_json = True
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
# magic will call a rss document 'xml'
|
||||
|
||||
@@ -1,19 +1,9 @@
|
||||
|
||||
from babel.numbers import parse_decimal
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
_price_re = re.compile(r"Price:\s*(\d+(?:\.\d+)?)", re.IGNORECASE)
|
||||
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||
@@ -34,7 +24,6 @@ class Restock(dict):
|
||||
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
# @todo locale needs to be the locale of the webpage
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
@@ -66,22 +55,15 @@ class Restock(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def get_price_from_history_str(history_str):
|
||||
m = _price_re.search(history_str)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
try:
|
||||
return str(Decimal(m.group(1)))
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
class Watch(BaseWatch):
|
||||
def __init__(self, *arg, **kw):
|
||||
super().__init__(*arg, **kw)
|
||||
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
|
||||
|
||||
self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
|
||||
'follow_price_changes': True,
|
||||
'in_stock_processing' : 'in_stock_only'
|
||||
} #@todo update
|
||||
|
||||
def clear_watch(self):
|
||||
super().clear_watch()
|
||||
@@ -90,27 +72,13 @@ class Watch(BaseWatch):
|
||||
def extra_notification_token_values(self):
|
||||
values = super().extra_notification_token_values()
|
||||
values['restock'] = self.get('restock', {})
|
||||
|
||||
values['restock']['previous_price'] = None
|
||||
if self.history_n >= 2:
|
||||
history = self.history
|
||||
if history and len(history) >=2:
|
||||
"""Unfortunately for now timestamp is stored as string key"""
|
||||
sorted_keys = sorted(list(history), key=lambda x: int(x))
|
||||
sorted_keys.reverse()
|
||||
|
||||
price_str = self.get_history_snapshot(timestamp=sorted_keys[-1])
|
||||
if price_str:
|
||||
values['restock']['previous_price'] = get_price_from_history_str(price_str)
|
||||
return values
|
||||
|
||||
def extra_notification_token_placeholder_info(self):
|
||||
values = super().extra_notification_token_placeholder_info()
|
||||
|
||||
values.append(('restock.price', "Price detected"))
|
||||
values.append(('restock.in_stock', "In stock status"))
|
||||
values.append(('restock.original_price', "Original price at first check"))
|
||||
values.append(('restock.previous_price', "Previous price in history"))
|
||||
|
||||
return values
|
||||
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
components:
|
||||
schemas:
|
||||
processor_config_restock_diff:
|
||||
type: object
|
||||
description: Configuration for the restock_diff processor (restock and price tracking)
|
||||
properties:
|
||||
in_stock_processing:
|
||||
type: string
|
||||
enum: [in_stock_only, all_changes, 'off']
|
||||
default: in_stock_only
|
||||
description: |
|
||||
When to trigger on stock changes:
|
||||
- `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions
|
||||
- `all_changes`: Trigger on any availability change
|
||||
- `off`: Disable stock/availability tracking
|
||||
follow_price_changes:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Monitor and track price changes
|
||||
price_change_min:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price drops below this value
|
||||
price_change_max:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price rises above this value
|
||||
price_change_threshold_percent:
|
||||
type: [number, 'null']
|
||||
minimum: 0
|
||||
maximum: 100
|
||||
description: Minimum price change percentage since the original price to trigger a notification
|
||||
|
||||
paths:
|
||||
/watch:
|
||||
post:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/product",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
data = {
|
||||
'url': 'https://example.com/product',
|
||||
'processor': 'restock_diff',
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 5,
|
||||
}
|
||||
}
|
||||
response = requests.post('http://localhost:5000/api/v1/watch',
|
||||
headers=headers, json=data)
|
||||
print(response.json())
|
||||
|
||||
/watch/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": true,
|
||||
"price_change_min": 10.00,
|
||||
"price_change_max": 500.00
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
uuid = 'YOUR-UUID'
|
||||
data = {
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'all_changes',
|
||||
'follow_price_changes': True,
|
||||
'price_change_min': 10.00,
|
||||
'price_change_max': 500.00,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
|
||||
/tag/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"overrides_watch": true,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 10
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
tag_uuid = 'YOUR-TAG-UUID'
|
||||
data = {
|
||||
'overrides_watch': True,
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 10,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user