mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-06-21 16:18:21 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 799818dd40 | |||
| b06797636c | |||
| fcd07e23f3 |
@@ -1,33 +0,0 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name localhost;
|
||||
|
||||
# Test basic reverse proxy to changedetection.io
|
||||
location / {
|
||||
proxy_pass http://changedet-app:5000;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
|
||||
# Test subpath deployment with X-Forwarded-Prefix
|
||||
location /changedet-sub/ {
|
||||
proxy_pass http://changedet-app:5000/;
|
||||
proxy_set_header X-Forwarded-Prefix /changedet-sub;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
}
|
||||
@@ -66,27 +66,27 @@ jobs:
|
||||
echo ${{ github.ref }} > changedetectionio/tag.txt
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Login to Docker Hub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -95,7 +95,7 @@ jobs:
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v6
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
@@ -103,19 +103,11 @@ jobs:
|
||||
ghcr.io/${{ github.repository }}
|
||||
tags: |
|
||||
type=raw,value=dev
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -136,10 +128,10 @@ jobs:
|
||||
echo "Release tag: ${{ github.event.release.tag_name }}"
|
||||
echo "Github ref: ${{ github.ref }}"
|
||||
echo "Github ref name: ${{ github.ref_name }}"
|
||||
|
||||
|
||||
- name: Docker meta :tag
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/metadata-action@v6
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta
|
||||
with:
|
||||
images: |
|
||||
@@ -150,20 +142,11 @@ jobs:
|
||||
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
||||
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
||||
type=raw,value=latest
|
||||
labels: |
|
||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||
org.opencontainers.image.documentation=https://changedetection.io
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||
org.opencontainers.image.title=changedetection.io
|
||||
org.opencontainers.image.url=https://changedetection.io
|
||||
org.opencontainers.image.version=${{ github.event.release.tag_name }}
|
||||
|
||||
- name: Build and push :tag
|
||||
id: docker_build_tag_release
|
||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
|
||||
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
@@ -60,14 +60,14 @@ jobs:
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v4
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
image: tonistiigi/binfmt:latest
|
||||
platforms: all
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
install: true
|
||||
version: latest
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
|
||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
# https://github.com/docker/build-push-action#customizing
|
||||
with:
|
||||
context: ./
|
||||
|
||||
@@ -52,13 +52,4 @@ jobs:
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.13'
|
||||
skip-pypuppeteer: true
|
||||
|
||||
|
||||
test-application-3-14:
|
||||
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.14'
|
||||
skip-pypuppeteer: false
|
||||
skip-pypuppeteer: true
|
||||
@@ -42,10 +42,10 @@ jobs:
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v4
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v7
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/test-changedetectionio.tar
|
||||
@@ -88,7 +88,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -99,7 +99,11 @@ jobs:
|
||||
|
||||
- name: Run Unit Tests
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest tests/unit/'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
@@ -112,7 +116,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -161,14 +165,14 @@ jobs:
|
||||
|
||||
- name: Store test artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Store CLI test output
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||
path: cli-opts-output.txt
|
||||
@@ -184,7 +188,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -226,7 +230,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -266,7 +270,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -302,7 +306,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -320,175 +324,6 @@ jobs:
|
||||
run: |
|
||||
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
|
||||
|
||||
nginx-reverse-proxy:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
|
||||
# Start changedetection.io container with X-Forwarded headers support
|
||||
docker run --name changedet-app --hostname changedet-app --network changedet-network \
|
||||
-e USE_X_SETTINGS=true \
|
||||
-d test-changedetectionio
|
||||
sleep 3
|
||||
|
||||
- name: Start nginx reverse proxy
|
||||
run: |
|
||||
# Start nginx with our test configuration
|
||||
docker run --name nginx-proxy --network changedet-network -d -p 8080:80 --rm \
|
||||
-v ${{ github.workspace }}/.github/nginx-reverse-proxy-test.conf:/etc/nginx/conf.d/default.conf:ro \
|
||||
nginx:alpine
|
||||
sleep 2
|
||||
|
||||
- name: Test reverse proxy - root path
|
||||
run: |
|
||||
echo "=== Testing nginx reverse proxy at root path ==="
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:8080/ > /tmp/nginx-test-root.html
|
||||
|
||||
# Check for changedetection.io UI elements
|
||||
if grep -q "checkbox-uuid" /tmp/nginx-test-root.html; then
|
||||
echo "✓ Found checkbox-uuid in response"
|
||||
else
|
||||
echo "ERROR: checkbox-uuid not found in response"
|
||||
cat /tmp/nginx-test-root.html
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for watchlist content
|
||||
if grep -q -i "watch" /tmp/nginx-test-root.html; then
|
||||
echo "✓ Found watch/watchlist content in response"
|
||||
else
|
||||
echo "ERROR: watchlist content not found"
|
||||
cat /tmp/nginx-test-root.html
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ Root path reverse proxy working correctly"
|
||||
|
||||
- name: Test reverse proxy - subpath with X-Forwarded-Prefix
|
||||
run: |
|
||||
echo "=== Testing nginx reverse proxy at subpath /changedet-sub/ ==="
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:8080/changedet-sub/ > /tmp/nginx-test-subpath.html
|
||||
|
||||
# Check for changedetection.io UI elements
|
||||
if grep -q "checkbox-uuid" /tmp/nginx-test-subpath.html; then
|
||||
echo "✓ Found checkbox-uuid in subpath response"
|
||||
else
|
||||
echo "ERROR: checkbox-uuid not found in subpath response"
|
||||
cat /tmp/nginx-test-subpath.html
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ Subpath reverse proxy working correctly"
|
||||
|
||||
- name: Test API through reverse proxy subpath
|
||||
run: |
|
||||
echo "=== Testing API endpoints through nginx subpath /changedet-sub/ ==="
|
||||
|
||||
# Extract API key from the changedetection.io datastore
|
||||
API_KEY=$(docker exec changedet-app cat /datastore/changedetection.json | grep -o '"api_access_token": *"[^"]*"' | cut -d'"' -f4)
|
||||
|
||||
if [ -z "$API_KEY" ]; then
|
||||
echo "ERROR: Could not extract API key from datastore"
|
||||
docker exec changedet-app cat /datastore/changedetection.json
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ Extracted API key: ${API_KEY:0:8}..."
|
||||
|
||||
# Create a watch via API through nginx proxy subpath
|
||||
echo "Creating watch via POST to /changedet-sub/api/v1/watch"
|
||||
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/changedet-sub/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/test-nginx-proxy",
|
||||
"tag": "nginx-test"
|
||||
}')
|
||||
|
||||
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
||||
BODY=$(echo "$RESPONSE" | head -n-1)
|
||||
|
||||
if [ "$HTTP_CODE" != "201" ]; then
|
||||
echo "ERROR: Expected HTTP 201, got $HTTP_CODE"
|
||||
echo "Response: $BODY"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ Watch created successfully (HTTP 201)"
|
||||
|
||||
# Extract the watch UUID from response
|
||||
WATCH_UUID=$(echo "$BODY" | grep -o '"uuid": *"[^"]*"' | cut -d'"' -f4)
|
||||
echo "✓ Watch UUID: $WATCH_UUID"
|
||||
|
||||
# Update the watch via PUT through nginx proxy subpath
|
||||
echo "Updating watch via PUT to /changedet-sub/api/v1/watch/${WATCH_UUID}"
|
||||
RESPONSE=$(curl -s -w "\n%{http_code}" -X PUT "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"paused": true
|
||||
}')
|
||||
|
||||
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
||||
BODY=$(echo "$RESPONSE" | head -n-1)
|
||||
|
||||
if [ "$HTTP_CODE" != "200" ]; then
|
||||
echo "ERROR: Expected HTTP 200, got $HTTP_CODE"
|
||||
echo "Response: $BODY"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if echo "$BODY" | grep -q 'OK'; then
|
||||
echo "✓ Watch updated successfully (HTTP 200, response: OK)"
|
||||
else
|
||||
echo "ERROR: Expected response 'OK', got: $BODY"
|
||||
echo "Response: $BODY"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify the watch is paused via GET
|
||||
echo "Verifying watch is paused via GET"
|
||||
RESPONSE=$(curl -s "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
|
||||
-H "x-api-key: ${API_KEY}")
|
||||
|
||||
if echo "$RESPONSE" | grep -q '"paused": *true'; then
|
||||
echo "✓ Watch is paused as expected"
|
||||
else
|
||||
echo "ERROR: Watch paused state not confirmed"
|
||||
echo "Response: $RESPONSE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ API tests through nginx subpath completed successfully"
|
||||
|
||||
- name: Cleanup nginx test
|
||||
if: always()
|
||||
run: |
|
||||
docker logs nginx-proxy || true
|
||||
docker logs changedet-app || true
|
||||
docker stop nginx-proxy changedet-app || true
|
||||
docker rm nginx-proxy changedet-app || true
|
||||
|
||||
|
||||
|
||||
# Proxy tests
|
||||
proxy-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -500,7 +335,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -540,7 +375,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -570,7 +405,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -583,10 +418,6 @@ jobs:
|
||||
run: |
|
||||
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
|
||||
|
||||
- name: Plugin get_html_head_extras hook injects into base.html
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_html_head_extras.py'
|
||||
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -598,7 +429,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -643,7 +474,7 @@ jobs:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v8
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
@@ -685,154 +516,3 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
docker rm sig-test
|
||||
|
||||
# Upgrade path test
|
||||
upgrade-path-test:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 25
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0 # Fetch all history and tags for upgrade testing
|
||||
|
||||
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Check upgrade works without error
|
||||
run: |
|
||||
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
|
||||
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
|
||||
g++ \
|
||||
gcc \
|
||||
libc-dev \
|
||||
libffi-dev \
|
||||
libjpeg-dev \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
make \
|
||||
patch \
|
||||
pkg-config \
|
||||
zlib1g-dev
|
||||
|
||||
# Checkout old version and create datastore
|
||||
git checkout 0.49.1
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
pip install 'pyOpenSSL>=23.2.0'
|
||||
|
||||
echo "=== Running version 0.49.1 to create datastore ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true python3 ./changedetection.py -C -d /tmp/data &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready
|
||||
echo "Waiting for 0.49.1 to be ready..."
|
||||
sleep 6
|
||||
|
||||
# Extract API key from datastore (0.49.1 uses url-watches.json)
|
||||
API_KEY=$(jq -r '.settings.application.api_access_token // empty' /tmp/data/url-watches.json)
|
||||
echo "API Key: ${API_KEY:0:8}..."
|
||||
|
||||
# Create a watch with tag "github-group-test" via API
|
||||
echo "Creating test watch with tag via API..."
|
||||
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--show-error --fail \
|
||||
--retry 6 --retry-delay 1 --retry-connrefused \
|
||||
-d '{
|
||||
"url": "https://example.com/upgrade-test",
|
||||
"tag": "github-group-test"
|
||||
}'
|
||||
|
||||
echo "✓ Created watch with tag 'github-group-test'"
|
||||
|
||||
# Create a specific test URL watch
|
||||
echo "Creating test URL watch via API..."
|
||||
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--show-error --fail \
|
||||
-d '{
|
||||
"url": "http://localhost/test.txt"
|
||||
}'
|
||||
|
||||
echo "✓ Created watch for 'http://localhost/test.txt' in version 0.49.1"
|
||||
|
||||
# Stop the old version gracefully
|
||||
kill $APP_PID
|
||||
wait $APP_PID || true
|
||||
echo "✓ Version 0.49.1 stopped"
|
||||
|
||||
# Upgrade to current version (use commit SHA since we're in detached HEAD)
|
||||
echo "Upgrading to commit ${{ github.sha }}"
|
||||
git checkout ${{ github.sha }}
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
|
||||
|
||||
echo "=== Upgrade test output ==="
|
||||
cat /tmp/upgrade-test.log
|
||||
echo "✓ Datastore upgraded successfully"
|
||||
|
||||
# Now start the current version normally to verify the tag survived
|
||||
echo "=== Starting current version to verify tag exists after upgrade ==="
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready and fetch UI
|
||||
echo "Waiting for current version to be ready..."
|
||||
sleep 5
|
||||
curl --retry 6 --retry-delay 1 --retry-connrefused --silent http://127.0.0.1:5000 > /tmp/ui-output.html
|
||||
|
||||
# Verify tag exists in UI
|
||||
if grep -q "github-group-test" /tmp/ui-output.html; then
|
||||
echo "✓ Tag 'github-group-test' found in UI after upgrade"
|
||||
else
|
||||
echo "ERROR: Tag 'github-group-test' not found in UI after upgrade"
|
||||
echo "=== UI Output ==="
|
||||
cat /tmp/ui-output.html
|
||||
echo "=== App Log ==="
|
||||
cat /tmp/ui-test.log
|
||||
kill $APP_PID || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify test URL exists in UI
|
||||
if grep -q "http://localhost/test.txt" /tmp/ui-output.html; then
|
||||
echo "✓ Watch URL 'http://localhost/test.txt' found in UI after upgrade"
|
||||
else
|
||||
echo "ERROR: Watch URL 'http://localhost/test.txt' not found in UI after upgrade"
|
||||
echo "=== UI Output ==="
|
||||
cat /tmp/ui-output.html
|
||||
echo "=== App Log ==="
|
||||
cat /tmp/ui-test.log
|
||||
kill $APP_PID || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
kill $APP_PID || true
|
||||
wait $APP_PID || true
|
||||
|
||||
echo ""
|
||||
echo "✓✓✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }} ✓✓✓"
|
||||
echo " - Commit: ${{ github.sha }}"
|
||||
echo " - Datastore migrated successfully"
|
||||
echo " - Tag 'github-group-test' survived upgrade"
|
||||
echo " - Watch URL 'http://localhost/test.txt' survived upgrade"
|
||||
|
||||
echo "✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }}"
|
||||
|
||||
- name: Upload upgrade test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/upgrade-test.log
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.8'
|
||||
__version__ = '0.52.9'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -61,22 +61,8 @@ import time
|
||||
# ==============================================================================
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
|
||||
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
|
||||
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
|
||||
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
|
||||
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
|
||||
if 'MALLOC_ARENA_MAX' not in os.environ:
|
||||
os.environ['MALLOC_ARENA_MAX'] = '2'
|
||||
try:
|
||||
import ctypes as _ctypes
|
||||
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||
if 'pytest' not in sys.modules:
|
||||
@@ -126,9 +112,9 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
update_q.close()
|
||||
notification_q.close()
|
||||
logger.debug("Queues closed successfully")
|
||||
logger.debug("Janus queues closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close queues: {e}")
|
||||
logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
|
||||
|
||||
# Shutdown socketio server fast
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
@@ -138,9 +124,13 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down Socket.IO server: {str(e)}")
|
||||
|
||||
# With immediate persistence, all data is already saved
|
||||
logger.success('All data already persisted (immediate commits enabled).')
|
||||
|
||||
# Save data quickly - force immediate save using abstract method
|
||||
try:
|
||||
datastore.force_save_all()
|
||||
logger.success('Fast sync to storage complete.')
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing to storage: {str(e)}")
|
||||
|
||||
sys.exit()
|
||||
|
||||
def print_help():
|
||||
@@ -196,6 +186,7 @@ def main():
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
include_default_watches = True
|
||||
@@ -278,7 +269,7 @@ def main():
|
||||
i += 1
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(cleaned_argv[1:], "6Csd:h:p:l:P:", "port")
|
||||
opts, args = getopt.getopt(cleaned_argv[1:], "6Ccsd:h:p:l:P:", "port")
|
||||
except getopt.GetoptError as e:
|
||||
print_help()
|
||||
print(f'Error: {e}')
|
||||
@@ -306,6 +297,10 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
|
||||
# Create the datadir if it doesnt exist
|
||||
if opt == '-C':
|
||||
create_datastore_dir = True
|
||||
@@ -385,15 +380,7 @@ def main():
|
||||
# Dont' start if the JSON DB looks corrupt
|
||||
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
|
||||
logger.critical(str(e))
|
||||
sys.exit(1)
|
||||
|
||||
# Testing mode: Exit cleanly after datastore initialization (for CI/CD upgrade tests)
|
||||
if os.environ.get('TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD'):
|
||||
logger.success(f"TESTING MODE: Datastore loaded successfully from {app_config['datastore_path']}")
|
||||
logger.success(f"TESTING MODE: Schema version: {datastore.data['settings']['application'].get('schema_version', 'unknown')}")
|
||||
logger.success(f"TESTING MODE: Loaded {len(datastore.data['watching'])} watches")
|
||||
logger.success("TESTING MODE: Exiting cleanly (TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD is set)")
|
||||
sys.exit(0)
|
||||
return
|
||||
|
||||
# Apply all_paused setting if specified via CLI
|
||||
if all_paused is not None:
|
||||
@@ -619,15 +606,19 @@ def main():
|
||||
else:
|
||||
logger.info("SIGUSR1 handler only registered on Linux, skipped.")
|
||||
|
||||
# Go into cleanup mode
|
||||
if do_cleanup:
|
||||
datastore.remove_unused_snapshots()
|
||||
|
||||
app.config['datastore_path'] = datastore_path
|
||||
|
||||
|
||||
@app.context_processor
|
||||
def inject_template_globals():
|
||||
return dict(right_sticky="v"+__version__,
|
||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False,
|
||||
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
|
||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True),
|
||||
all_paused=datastore.data['settings']['application'].get('all_paused', False),
|
||||
all_muted=datastore.data['settings']['application'].get('all_muted', False)
|
||||
)
|
||||
|
||||
@@ -4,10 +4,6 @@ from flask import request
|
||||
from functools import wraps
|
||||
from . import auth, validate_openapi_request
|
||||
from ..validate_url import is_safe_valid_url
|
||||
import json
|
||||
|
||||
# Number of URLs above which import switches to background processing
|
||||
IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD = 20
|
||||
|
||||
|
||||
def default_content_type(content_type='text/plain'):
|
||||
@@ -23,76 +19,6 @@ def default_content_type(content_type='text/plain'):
|
||||
return decorator
|
||||
|
||||
|
||||
def convert_query_param_to_type(value, schema_property):
|
||||
"""
|
||||
Convert a query parameter string to the appropriate type based on schema definition.
|
||||
|
||||
Args:
|
||||
value: String value from query parameter
|
||||
schema_property: Schema property definition with 'type' or 'anyOf' field
|
||||
|
||||
Returns:
|
||||
Converted value in the appropriate type
|
||||
|
||||
Supports both OpenAPI 3.1 formats:
|
||||
- type: [string, 'null'] (array format)
|
||||
- anyOf: [{type: string}, {type: null}] (anyOf format)
|
||||
"""
|
||||
prop_type = schema_property.get('type')
|
||||
|
||||
# Handle OpenAPI 3.1 type arrays: type: [string, 'null']
|
||||
if isinstance(prop_type, list):
|
||||
# Use the first non-null type from the array
|
||||
for t in prop_type:
|
||||
if t != 'null':
|
||||
prop_type = t
|
||||
break
|
||||
else:
|
||||
prop_type = None
|
||||
|
||||
# Handle anyOf schemas (older format)
|
||||
elif 'anyOf' in schema_property:
|
||||
# Use the first non-null type from anyOf
|
||||
for option in schema_property['anyOf']:
|
||||
if option.get('type') and option.get('type') != 'null':
|
||||
prop_type = option.get('type')
|
||||
break
|
||||
else:
|
||||
prop_type = None
|
||||
|
||||
# Handle array type (e.g., notification_urls)
|
||||
if prop_type == 'array':
|
||||
# Support both comma-separated and JSON array format
|
||||
if value.startswith('['):
|
||||
try:
|
||||
return json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
return [v.strip() for v in value.split(',')]
|
||||
return [v.strip() for v in value.split(',')]
|
||||
|
||||
# Handle object type (e.g., time_between_check, headers)
|
||||
elif prop_type == 'object':
|
||||
try:
|
||||
return json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError(f"Invalid JSON object for field: {value}")
|
||||
|
||||
# Handle boolean type
|
||||
elif prop_type == 'boolean':
|
||||
return strtobool(value)
|
||||
|
||||
# Handle integer type
|
||||
elif prop_type == 'integer':
|
||||
return int(value)
|
||||
|
||||
# Handle number type (float)
|
||||
elif prop_type == 'number':
|
||||
return float(value)
|
||||
|
||||
# Default: return as string
|
||||
return value
|
||||
|
||||
|
||||
class Import(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
@@ -102,128 +28,40 @@ class Import(Resource):
|
||||
@default_content_type('text/plain') #3547 #3542
|
||||
@validate_openapi_request('importWatches')
|
||||
def post(self):
|
||||
"""Import a list of watched URLs with optional watch configuration."""
|
||||
from . import get_watch_schema_properties
|
||||
# Special parameters that are NOT watch configuration
|
||||
special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'}
|
||||
"""Import a list of watched URLs."""
|
||||
|
||||
extras = {}
|
||||
|
||||
# Handle special 'proxy' parameter
|
||||
if request.args.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not request.args.get('proxy') in plist:
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
else:
|
||||
extras['proxy'] = request.args.get('proxy')
|
||||
|
||||
# Handle special 'dedupe' parameter
|
||||
dedupe = strtobool(request.args.get('dedupe', 'true'))
|
||||
|
||||
# Handle special 'tag' and 'tag_uuids' parameters
|
||||
tags = request.args.get('tag')
|
||||
tag_uuids = request.args.get('tag_uuids')
|
||||
|
||||
if tag_uuids:
|
||||
tag_uuids = tag_uuids.split(',')
|
||||
|
||||
# Extract ALL other query parameters as watch configuration
|
||||
# Get schema from OpenAPI spec (replaces old schema_create_watch)
|
||||
schema_properties = get_watch_schema_properties()
|
||||
for param_name, param_value in request.args.items():
|
||||
# Skip special parameters
|
||||
if param_name in special_params:
|
||||
continue
|
||||
|
||||
# Skip if not in schema (unknown parameter)
|
||||
if param_name not in schema_properties:
|
||||
return f"Unknown watch configuration parameter: {param_name}", 400
|
||||
|
||||
# Convert to appropriate type based on schema
|
||||
try:
|
||||
converted_value = convert_query_param_to_type(param_value, schema_properties[param_name])
|
||||
extras[param_name] = converted_value
|
||||
except (ValueError, json.JSONDecodeError) as e:
|
||||
return f"Invalid value for parameter '{param_name}': {str(e)}", 400
|
||||
|
||||
# Validate processor if provided
|
||||
if 'processor' in extras:
|
||||
from changedetectionio.processors import available_processors
|
||||
available = [p[0] for p in available_processors()]
|
||||
if extras['processor'] not in available:
|
||||
return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
|
||||
|
||||
# Validate fetch_backend if provided
|
||||
if 'fetch_backend' in extras:
|
||||
from changedetectionio.content_fetchers import available_fetchers
|
||||
available = [f[0] for f in available_fetchers()]
|
||||
# Also allow 'system' and extra_browser_* patterns
|
||||
is_valid = (
|
||||
extras['fetch_backend'] == 'system' or
|
||||
extras['fetch_backend'] in available or
|
||||
extras['fetch_backend'].startswith('extra_browser_')
|
||||
)
|
||||
if not is_valid:
|
||||
return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in extras:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
validate_notification_urls(extras['notification_urls'])
|
||||
except ValidationError as e:
|
||||
return f"Invalid notification_urls: {str(e)}", 400
|
||||
|
||||
urls = request.get_data().decode('utf8').splitlines()
|
||||
# Clean and validate URLs upfront
|
||||
urls_to_import = []
|
||||
added = []
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if not len(url):
|
||||
continue
|
||||
|
||||
# Validate URL
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
if not is_safe_valid_url(url):
|
||||
return f"Invalid or unsupported URL - {url}", 400
|
||||
|
||||
# Check for duplicates if dedupe is enabled
|
||||
if dedupe and self.datastore.url_exists(url):
|
||||
continue
|
||||
|
||||
urls_to_import.append(url)
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added.append(new_uuid)
|
||||
|
||||
# For small imports, process synchronously for immediate feedback
|
||||
if len(urls_to_import) < IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD:
|
||||
added = []
|
||||
for url in urls_to_import:
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added.append(new_uuid)
|
||||
return added, 200
|
||||
|
||||
# For large imports (>= 20), process in background thread
|
||||
else:
|
||||
import threading
|
||||
from loguru import logger
|
||||
|
||||
def import_watches_background():
|
||||
"""Background thread to import watches - discarded after completion."""
|
||||
try:
|
||||
added_count = 0
|
||||
for url in urls_to_import:
|
||||
try:
|
||||
self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error importing URL {url}: {e}")
|
||||
|
||||
logger.info(f"Background import complete: {added_count} watches created")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background import: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=import_watches_background, daemon=True, name="ImportWatches-Background")
|
||||
thread.start()
|
||||
|
||||
return {'status': f'Importing {len(urls_to_import)} URLs in background', 'count': len(urls_to_import)}, 202
|
||||
return added
|
||||
@@ -1,6 +1,8 @@
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import Resource, abort
|
||||
from flask import request
|
||||
from . import auth, validate_openapi_request
|
||||
from . import schema_create_notification_urls, schema_delete_notification_urls
|
||||
|
||||
class Notifications(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
@@ -20,6 +22,7 @@ class Notifications(Resource):
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('addNotifications')
|
||||
@expects_json(schema_create_notification_urls)
|
||||
def post(self):
|
||||
"""Create Notification URLs."""
|
||||
|
||||
@@ -47,6 +50,7 @@ class Notifications(Resource):
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('replaceNotifications')
|
||||
@expects_json(schema_create_notification_urls)
|
||||
def put(self):
|
||||
"""Replace Notification URLs."""
|
||||
json_data = request.get_json()
|
||||
@@ -63,12 +67,13 @@ class Notifications(Resource):
|
||||
|
||||
clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)]
|
||||
self.datastore.data['settings']['application']['notification_urls'] = clean_urls
|
||||
self.datastore.commit()
|
||||
self.datastore.needs_write = True
|
||||
|
||||
return {'notification_urls': clean_urls}, 200
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteNotifications')
|
||||
@expects_json(schema_delete_notification_urls)
|
||||
def delete(self):
|
||||
"""Delete Notification URLs."""
|
||||
|
||||
@@ -90,7 +95,7 @@ class Notifications(Resource):
|
||||
abort(400, message="No matching notification URLs found.")
|
||||
|
||||
self.datastore.data['settings']['application']['notification_urls'] = notification_urls
|
||||
self.datastore.commit()
|
||||
self.datastore.needs_write = True
|
||||
|
||||
return 'OK', 204
|
||||
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
import functools
|
||||
from flask import make_response
|
||||
from flask_restful import Resource
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _get_spec_yaml():
|
||||
"""Build and cache the merged spec as a YAML string (only serialized once per process)."""
|
||||
import yaml
|
||||
from changedetectionio.api import build_merged_spec_dict
|
||||
return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True)
|
||||
|
||||
|
||||
class Spec(Resource):
|
||||
def get(self):
|
||||
"""Return the merged OpenAPI spec including all registered processor extensions."""
|
||||
return make_response(
|
||||
_get_spec_yaml(),
|
||||
200,
|
||||
{'Content-Type': 'application/yaml'}
|
||||
)
|
||||
@@ -1,5 +1,6 @@
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_pool
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
|
||||
@@ -7,7 +8,8 @@ import threading
|
||||
from flask import request
|
||||
from . import auth
|
||||
|
||||
from . import validate_openapi_request
|
||||
# Import schemas from __init__.py
|
||||
from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request
|
||||
|
||||
|
||||
class Tag(Resource):
|
||||
@@ -17,12 +19,13 @@ class Tag(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single tag
|
||||
# curl http://localhost:5000/api/v1/tag/<uuid_str:uuid>
|
||||
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getTag')
|
||||
def get(self, uuid):
|
||||
"""Get data for a single tag/group, toggle notification muting, or recheck all."""
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
|
||||
from copy import deepcopy
|
||||
tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
|
||||
if not tag:
|
||||
abort(404, message=f'No tag exists with the UUID of {uuid}')
|
||||
|
||||
@@ -59,33 +62,13 @@ class Tag(Resource):
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
tag['notification_muted'] = True
|
||||
tag.commit()
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
tag['notification_muted'] = False
|
||||
tag.commit()
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False
|
||||
return "OK", 200
|
||||
|
||||
# Filter out Watch-specific runtime fields that don't apply to Tags (yet)
|
||||
# TODO: Future enhancement - aggregate these values from all Watches that have this tag:
|
||||
# - check_count: sum of all watches' check_count
|
||||
# - last_checked: most recent last_checked from all watches
|
||||
# - last_changed: most recent last_changed from all watches
|
||||
# - consecutive_filter_failures: count of watches with failures
|
||||
# - etc.
|
||||
# These come from watch_base inheritance but currently have no meaningful value for Tags
|
||||
watch_only_fields = {
|
||||
'browser_steps_last_error_step', 'check_count', 'consecutive_filter_failures',
|
||||
'content-type', 'fetch_time', 'last_changed', 'last_checked', 'last_error',
|
||||
'last_notification_error', 'last_viewed', 'notification_alert_count',
|
||||
'page_title', 'previous_md5', 'remote_server_reply'
|
||||
}
|
||||
|
||||
# Create clean tag dict without Watch-specific fields
|
||||
clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields}
|
||||
|
||||
return clean_tag
|
||||
return tag
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteTag')
|
||||
@@ -96,94 +79,51 @@ class Tag(Resource):
|
||||
|
||||
# Delete the tag, and any tag reference
|
||||
del self.datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
|
||||
# Remove tag from all watches
|
||||
for watch_uuid, watch in self.datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
watch['tags'].remove(uuid)
|
||||
watch.commit()
|
||||
|
||||
return 'OK', 204
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('updateTag')
|
||||
@expects_json(schema_update_tag)
|
||||
def put(self, uuid):
|
||||
"""Update tag information."""
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if not tag:
|
||||
abort(404, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Make a mutable copy of request.json for modification
|
||||
json_data = dict(request.json)
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in json_data:
|
||||
if 'notification_urls' in request.json:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = json_data.get('notification_urls', [])
|
||||
notification_urls = request.json.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# Filter out readOnly fields (extracted from OpenAPI spec Tag schema)
|
||||
# These are system-managed fields that should never be user-settable
|
||||
from . import get_readonly_tag_fields
|
||||
readonly_fields = get_readonly_tag_fields()
|
||||
|
||||
# Tag model inherits from watch_base but has no @property attributes of its own
|
||||
# So we only need to filter readOnly fields
|
||||
for field in readonly_fields:
|
||||
json_data.pop(field, None)
|
||||
|
||||
# Validate remaining fields - reject truly unknown fields
|
||||
# Get valid fields from Tag schema
|
||||
from . import get_tag_schema_properties
|
||||
valid_fields = set(get_tag_schema_properties().keys())
|
||||
|
||||
# Check for unknown fields
|
||||
unknown_fields = set(json_data.keys()) - valid_fields
|
||||
if unknown_fields:
|
||||
return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
|
||||
|
||||
tag.update(json_data)
|
||||
tag.commit()
|
||||
|
||||
# Clear checksums for all watches using this tag to force reprocessing
|
||||
# Tag changes affect inherited configuration
|
||||
cleared_count = self.datastore.clear_checksums_for_tag(uuid)
|
||||
logger.info(f"Tag {uuid} updated via API, cleared {cleared_count} watch checksums")
|
||||
tag.update(request.json)
|
||||
self.datastore.needs_write_urgent = True
|
||||
|
||||
return "OK", 200
|
||||
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('createTag')
|
||||
# Only cares for {'title': 'xxxx'}
|
||||
def post(self):
|
||||
"""Create a single tag/group."""
|
||||
|
||||
json_data = request.get_json()
|
||||
title = json_data.get("title",'').strip()
|
||||
|
||||
# Validate that only valid fields are provided
|
||||
# Get valid fields from Tag schema
|
||||
from . import get_tag_schema_properties
|
||||
valid_fields = set(get_tag_schema_properties().keys())
|
||||
|
||||
# Check for unknown fields
|
||||
unknown_fields = set(json_data.keys()) - valid_fields
|
||||
if unknown_fields:
|
||||
return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
|
||||
|
||||
new_uuid = self.datastore.add_tag(title=title)
|
||||
if new_uuid:
|
||||
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
|
||||
extra = {k: v for k, v in json_data.items() if k != 'title'}
|
||||
if extra:
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
|
||||
if tag:
|
||||
tag.update(extra)
|
||||
tag.commit()
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported tag", 400
|
||||
|
||||
@@ -8,11 +8,13 @@ from . import auth
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
from changedetectionio import worker_pool
|
||||
from flask import request, make_response, send_from_directory
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
import copy
|
||||
|
||||
from . import validate_openapi_request, get_readonly_watch_fields
|
||||
# Import schemas from __init__.py
|
||||
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
|
||||
from ..notification import valid_notification_formats
|
||||
from ..notification.handler import newline_re
|
||||
|
||||
@@ -57,7 +59,7 @@ class Watch(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single watch, excluding the history list (can be large)
|
||||
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>
|
||||
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
||||
# ?recheck=true
|
||||
@auth.check_token
|
||||
@@ -82,19 +84,15 @@ class Watch(Resource):
|
||||
return "OK", 200
|
||||
if request.args.get('paused', '') == 'paused':
|
||||
watch_obj.pause()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
elif request.args.get('paused', '') == 'unpaused':
|
||||
watch_obj.unpause()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
watch_obj.mute()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
watch_obj.unmute()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
|
||||
# Return without history, get that via another API call
|
||||
@@ -119,6 +117,7 @@ class Watch(Resource):
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('updateWatch')
|
||||
@expects_json(schema_update_watch)
|
||||
def put(self, uuid):
|
||||
"""Update watch information."""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
@@ -172,38 +171,8 @@ class Watch(Resource):
|
||||
# Extract and remove processor config fields from json_data
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(json_data)
|
||||
|
||||
# Filter out readOnly fields (extracted from OpenAPI spec Watch schema)
|
||||
# These are system-managed fields that should never be user-settable
|
||||
readonly_fields = get_readonly_watch_fields()
|
||||
|
||||
# Also filter out @property attributes (computed/derived values from the model)
|
||||
# These are not stored and should be ignored in PUT requests
|
||||
from changedetectionio.model.Watch import model as WatchModel
|
||||
property_fields = WatchModel.get_property_names()
|
||||
|
||||
# Combine both sets of fields to ignore
|
||||
fields_to_ignore = readonly_fields | property_fields
|
||||
|
||||
# Remove all ignored fields from update data
|
||||
for field in fields_to_ignore:
|
||||
json_data.pop(field, None)
|
||||
|
||||
# Validate remaining fields - reject truly unknown fields
|
||||
# Get valid fields from WatchBase schema
|
||||
from . import get_watch_schema_properties
|
||||
valid_fields = set(get_watch_schema_properties().keys())
|
||||
|
||||
# Also allow last_viewed (explicitly defined in UpdateWatch schema)
|
||||
valid_fields.add('last_viewed')
|
||||
|
||||
# Check for unknown fields
|
||||
unknown_fields = set(json_data.keys()) - valid_fields
|
||||
if unknown_fields:
|
||||
return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400
|
||||
|
||||
# Update watch with regular (non-processor-config) fields
|
||||
watch.update(json_data)
|
||||
watch.commit()
|
||||
|
||||
# Save processor config to JSON file
|
||||
processors.save_processor_config(self.datastore, uuid, processor_config_data)
|
||||
@@ -217,7 +186,7 @@ class WatchHistory(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
# Get a list of available history for a watch by UUID
|
||||
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>/history
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchHistory')
|
||||
def get(self, uuid):
|
||||
@@ -338,7 +307,7 @@ class WatchHistoryDiff(Resource):
|
||||
word_diff = True
|
||||
|
||||
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'false'))
|
||||
changes_only = strtobool(request.args.get('changesOnly', 'true'))
|
||||
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
||||
include_removed = strtobool(request.args.get('removed', 'true'))
|
||||
include_added = strtobool(request.args.get('added', 'true'))
|
||||
@@ -349,7 +318,7 @@ class WatchHistoryDiff(Resource):
|
||||
previous_version_file_contents=from_version_file_contents,
|
||||
newest_version_file_contents=to_version_file_contents,
|
||||
ignore_junk=ignore_whitespace,
|
||||
include_equal=not changes_only,
|
||||
include_equal=changes_only,
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
@@ -400,10 +369,10 @@ class WatchFavicon(Resource):
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.data_dir, favicon_filename)
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
@@ -419,6 +388,7 @@ class CreateWatch(Resource):
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('createWatch')
|
||||
@expects_json(schema_create_watch)
|
||||
def post(self):
|
||||
"""Create a single watch."""
|
||||
|
||||
@@ -449,14 +419,8 @@ class CreateWatch(Resource):
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not watch)
|
||||
from changedetectionio import processors
|
||||
|
||||
extras = copy.deepcopy(json_data)
|
||||
|
||||
# Extract and remove processor config fields from extras
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(extras)
|
||||
|
||||
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
|
||||
tags = None
|
||||
if extras.get('tag'):
|
||||
@@ -466,10 +430,6 @@ class CreateWatch(Resource):
|
||||
del extras['url']
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||
|
||||
# Save processor config to separate JSON file
|
||||
if new_uuid and processor_config_data:
|
||||
processors.save_processor_config(self.datastore, new_uuid, processor_config_data)
|
||||
if new_uuid:
|
||||
# Dont queue because the scheduler will check that it hasnt been checked before anyway
|
||||
# worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
@@ -506,7 +466,6 @@ class CreateWatch(Resource):
|
||||
'last_error': watch['last_error'],
|
||||
'link': watch.link,
|
||||
'page_title': watch['page_title'],
|
||||
'tags': [*tags], # Unpack dict keys to list (can't use list() since variable named 'list')
|
||||
'title': watch['title'],
|
||||
'url': watch['url'],
|
||||
'viewed': watch.viewed
|
||||
@@ -567,4 +526,4 @@ class CreateWatch(Resource):
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
return list, 200
|
||||
return list, 200
|
||||
@@ -1,137 +1,50 @@
|
||||
import copy
|
||||
import functools
|
||||
from flask import request, abort
|
||||
from loguru import logger
|
||||
from . import api_schema
|
||||
from ..model import watch_base
|
||||
|
||||
@functools.cache
|
||||
def build_merged_spec_dict():
|
||||
"""
|
||||
Load the base OpenAPI spec and merge in any per-processor api.yaml extensions.
|
||||
# Build a JSON Schema atleast partially based on our Watch model
|
||||
watch_base_config = watch_base()
|
||||
schema = api_schema.build_watch_json_schema(watch_base_config)
|
||||
|
||||
Each processor can provide an api.yaml file alongside its __init__.py that defines
|
||||
additional schemas (e.g., processor_config_restock_diff). These are merged into
|
||||
WatchBase.properties so the spec accurately reflects what the API accepts.
|
||||
schema_create_watch = copy.deepcopy(schema)
|
||||
schema_create_watch['required'] = ['url']
|
||||
del schema_create_watch['properties']['last_viewed']
|
||||
|
||||
Plugin processors (via pluggy) are also supported - they just need an api.yaml
|
||||
next to their processor module.
|
||||
schema_update_watch = copy.deepcopy(schema)
|
||||
schema_update_watch['additionalProperties'] = False
|
||||
|
||||
Returns the merged dict (cached - do not mutate the returned value).
|
||||
"""
|
||||
import os
|
||||
import yaml
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
|
||||
try:
|
||||
from changedetectionio.processors import find_processors, get_parent_module
|
||||
for module, proc_name in find_processors():
|
||||
parent = get_parent_module(module)
|
||||
if not parent or not hasattr(parent, '__file__'):
|
||||
continue
|
||||
api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml')
|
||||
if not os.path.exists(api_yaml_path):
|
||||
continue
|
||||
with open(api_yaml_path, 'r', encoding='utf-8') as f:
|
||||
proc_spec = yaml.safe_load(f)
|
||||
# Merge schemas
|
||||
proc_schemas = proc_spec.get('components', {}).get('schemas', {})
|
||||
spec_dict['components']['schemas'].update(proc_schemas)
|
||||
# Inject processor_config_{name} into WatchBase if the schema is defined
|
||||
schema_key = f'processor_config_{proc_name}'
|
||||
if schema_key in proc_schemas:
|
||||
spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = {
|
||||
'$ref': f'#/components/schemas/{schema_key}'
|
||||
}
|
||||
# Append x-code-samples from processor paths into existing path operations
|
||||
for path, path_item in proc_spec.get('paths', {}).items():
|
||||
if path not in spec_dict.get('paths', {}):
|
||||
continue
|
||||
for method, operation in path_item.items():
|
||||
if method not in spec_dict['paths'][path]:
|
||||
continue
|
||||
if 'x-code-samples' in operation:
|
||||
existing = spec_dict['paths'][path][method].get('x-code-samples', [])
|
||||
spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples']
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to merge processor API specs: {e}")
|
||||
|
||||
return spec_dict
|
||||
# Tag schema is also based on watch_base since Tag inherits from it
|
||||
schema_tag = copy.deepcopy(schema)
|
||||
schema_create_tag = copy.deepcopy(schema_tag)
|
||||
schema_create_tag['required'] = ['title']
|
||||
schema_update_tag = copy.deepcopy(schema_tag)
|
||||
schema_update_tag['additionalProperties'] = False
|
||||
|
||||
schema_notification_urls = copy.deepcopy(schema)
|
||||
schema_create_notification_urls = copy.deepcopy(schema_notification_urls)
|
||||
schema_create_notification_urls['required'] = ['notification_urls']
|
||||
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
|
||||
schema_delete_notification_urls['required'] = ['notification_urls']
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
import os
|
||||
import yaml # Lazy import - only loaded when API validation is actually used
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
return OpenAPI.from_dict(build_merged_spec_dict())
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
# Possibly for pip3 packages
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
Used by Import endpoint to validate and convert query parameters.
|
||||
Returns the merged YAML dict (not the OpenAPI object).
|
||||
"""
|
||||
return build_merged_spec_dict()
|
||||
|
||||
@functools.cache
|
||||
def _resolve_schema_properties(schema_name):
|
||||
"""
|
||||
Generic helper to resolve schema properties, including allOf inheritance.
|
||||
|
||||
Args:
|
||||
schema_name: Name of the schema (e.g., 'WatchBase', 'Watch', 'Tag')
|
||||
|
||||
Returns:
|
||||
dict: All properties including inherited ones from $ref schemas
|
||||
"""
|
||||
spec_dict = get_openapi_schema_dict()
|
||||
schema = spec_dict['components']['schemas'].get(schema_name, {})
|
||||
|
||||
properties = {}
|
||||
|
||||
# Handle allOf (schema inheritance)
|
||||
if 'allOf' in schema:
|
||||
for item in schema['allOf']:
|
||||
# Resolve $ref to parent schema
|
||||
if '$ref' in item:
|
||||
ref_path = item['$ref'].split('/')[-1]
|
||||
ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
|
||||
properties.update(ref_schema.get('properties', {}))
|
||||
# Add schema-specific properties
|
||||
if 'properties' in item:
|
||||
properties.update(item['properties'])
|
||||
else:
|
||||
# Direct properties (no inheritance)
|
||||
properties = schema.get('properties', {})
|
||||
|
||||
return properties
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_watch_schema_properties():
|
||||
"""
|
||||
Extract watch schema properties from OpenAPI spec for Import endpoint.
|
||||
|
||||
Returns WatchBase properties (all writable Watch fields).
|
||||
"""
|
||||
return _resolve_schema_properties('WatchBase')
|
||||
|
||||
# Import readonly field utilities from shared module (avoids circular dependencies with model layer)
|
||||
from changedetectionio.model.schema_utils import get_readonly_watch_fields, get_readonly_tag_fields
|
||||
|
||||
@functools.cache
|
||||
def get_tag_schema_properties():
|
||||
"""
|
||||
Extract Tag schema properties from OpenAPI spec.
|
||||
|
||||
Returns WatchBase properties + Tag-specific properties (overrides_watch).
|
||||
"""
|
||||
return _resolve_schema_properties('Tag')
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
||||
return _openapi_spec
|
||||
|
||||
def validate_openapi_request(operation_id):
|
||||
"""Decorator to validate incoming requests against OpenAPI spec."""
|
||||
@@ -144,7 +57,6 @@ def validate_openapi_request(operation_id):
|
||||
if request.method.upper() != 'GET':
|
||||
# Lazy import - only loaded when actually validating a request
|
||||
from openapi_core.contrib.flask import FlaskOpenAPIRequest
|
||||
from openapi_core.templating.paths.exceptions import ServerNotFound, PathNotFound, PathError
|
||||
|
||||
spec = get_openapi_spec()
|
||||
openapi_request = FlaskOpenAPIRequest(request)
|
||||
@@ -152,29 +64,8 @@ def validate_openapi_request(operation_id):
|
||||
if result.errors:
|
||||
error_details = []
|
||||
for error in result.errors:
|
||||
# Skip path/server validation errors for reverse proxy compatibility
|
||||
# Flask routing already validates that endpoints exist (returns 404 if not).
|
||||
# OpenAPI validation here is primarily for request body schema validation.
|
||||
# When behind nginx/reverse proxy, URLs may have path prefixes that don't
|
||||
# match the OpenAPI server definitions, causing false positives.
|
||||
if isinstance(error, PathError):
|
||||
logger.debug(f"API Call - Skipping path/server validation (delegated to Flask): {error}")
|
||||
continue
|
||||
|
||||
error_str = str(error)
|
||||
# Extract detailed schema errors from __cause__
|
||||
if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
|
||||
for schema_error in error.__cause__.schema_errors:
|
||||
field = '.'.join(str(p) for p in schema_error.path) if schema_error.path else 'body'
|
||||
msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
|
||||
error_details.append(f"{field}: {msg}")
|
||||
else:
|
||||
error_details.append(error_str)
|
||||
|
||||
# Only raise if we have actual validation errors (not path/server issues)
|
||||
if error_details:
|
||||
logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
|
||||
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
|
||||
error_details.append(str(error))
|
||||
raise BadRequest(f"OpenAPI validation failed: {error_details}")
|
||||
except BadRequest:
|
||||
# Re-raise BadRequest exceptions (validation failures)
|
||||
raise
|
||||
@@ -191,6 +82,5 @@ from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, Cr
|
||||
from .Tags import Tags, Tag
|
||||
from .Import import Import
|
||||
from .SystemInfo import SystemInfo
|
||||
from .Spec import Spec
|
||||
from .Notifications import Notifications
|
||||
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
# Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
|
||||
# Probably other ways to solve this when the backend switches to some ORM
|
||||
from changedetectionio.notification import valid_notification_formats
|
||||
|
||||
|
||||
def build_time_between_check_json_schema():
|
||||
# Setup time between check schema
|
||||
schema_properties_time_between_check = {
|
||||
"type": "object",
|
||||
"additionalProperties": False,
|
||||
"properties": {}
|
||||
}
|
||||
for p in ['weeks', 'days', 'hours', 'minutes', 'seconds']:
|
||||
schema_properties_time_between_check['properties'][p] = {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
return schema_properties_time_between_check
|
||||
|
||||
def build_watch_json_schema(d):
|
||||
# Base JSON schema
|
||||
schema = {
|
||||
'type': 'object',
|
||||
'properties': {},
|
||||
}
|
||||
|
||||
for k, v in d.items():
|
||||
# @todo 'integer' is not covered here because its almost always for internal usage
|
||||
|
||||
if isinstance(v, type(None)):
|
||||
schema['properties'][k] = {
|
||||
"anyOf": [
|
||||
{"type": "null"},
|
||||
]
|
||||
}
|
||||
elif isinstance(v, list):
|
||||
schema['properties'][k] = {
|
||||
"anyOf": [
|
||||
{"type": "array",
|
||||
# Always is an array of strings, like text or regex or something
|
||||
"items": {
|
||||
"type": "string",
|
||||
"maxLength": 5000
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
elif isinstance(v, bool):
|
||||
schema['properties'][k] = {
|
||||
"anyOf": [
|
||||
{"type": "boolean"},
|
||||
]
|
||||
}
|
||||
elif isinstance(v, str):
|
||||
schema['properties'][k] = {
|
||||
"anyOf": [
|
||||
{"type": "string",
|
||||
"maxLength": 5000},
|
||||
]
|
||||
}
|
||||
|
||||
# Can also be a string (or None by default above)
|
||||
for v in ['body',
|
||||
'notification_body',
|
||||
'notification_format',
|
||||
'notification_title',
|
||||
'proxy',
|
||||
'tag',
|
||||
'title',
|
||||
'webdriver_js_execute_code'
|
||||
]:
|
||||
schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
|
||||
|
||||
for v in ['last_viewed']:
|
||||
schema['properties'][v] = {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp in seconds of the last time the watch was viewed.",
|
||||
"minimum": 0
|
||||
}
|
||||
|
||||
# None or Boolean
|
||||
schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
|
||||
|
||||
schema['properties']['method'] = {"type": "string",
|
||||
"enum": ["GET", "POST", "DELETE", "PUT"]
|
||||
}
|
||||
|
||||
schema['properties']['fetch_backend']['anyOf'].append({"type": "string",
|
||||
"enum": ["html_requests", "html_webdriver"]
|
||||
})
|
||||
|
||||
schema['properties']['processor'] = {"anyOf": [
|
||||
{"type": "string", "enum": ["restock_diff", "text_json_diff"]},
|
||||
{"type": "null"}
|
||||
]}
|
||||
|
||||
# All headers must be key/value type dict
|
||||
schema['properties']['headers'] = {
|
||||
"type": "object",
|
||||
"patternProperties": {
|
||||
# Should always be a string:string type value
|
||||
".*": {"type": "string"},
|
||||
}
|
||||
}
|
||||
|
||||
schema['properties']['notification_format'] = {'type': 'string',
|
||||
'enum': list(valid_notification_formats.keys())
|
||||
}
|
||||
|
||||
# Stuff that shouldn't be available but is just state-storage
|
||||
for v in ['previous_md5', 'last_error', 'has_ldjson_price_data', 'previous_md5_before_filters', 'uuid']:
|
||||
del schema['properties'][v]
|
||||
|
||||
schema['properties']['webdriver_delay']['anyOf'].append({'type': 'integer'})
|
||||
|
||||
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
||||
|
||||
schema['properties']['time_between_check_use_default'] = {
|
||||
"type": "boolean",
|
||||
"default": True,
|
||||
"description": "Whether to use global settings for time between checks - defaults to true if not set"
|
||||
}
|
||||
|
||||
schema['properties']['browser_steps'] = {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"operation": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000 # Allows null and any string up to 5000 chars (including "")
|
||||
},
|
||||
"selector": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000
|
||||
},
|
||||
"optional_value": {
|
||||
"type": ["string", "null"],
|
||||
"maxLength": 5000
|
||||
}
|
||||
},
|
||||
"required": ["operation", "selector", "optional_value"],
|
||||
"additionalProperties": False # No extra keys allowed
|
||||
}
|
||||
},
|
||||
{"type": "null"}, # Allows null for `browser_steps`
|
||||
{"type": "array", "maxItems": 0} # Allows empty array []
|
||||
]
|
||||
}
|
||||
|
||||
# headers ?
|
||||
return schema
|
||||
|
||||
@@ -13,7 +13,7 @@ from loguru import logger
|
||||
BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
|
||||
|
||||
|
||||
def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
def create_backup(datastore_path, watches: dict):
|
||||
logger.debug("Creating backup...")
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
@@ -40,18 +40,14 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
||||
for uuid, tag in (tags or {}).items():
|
||||
for f in Path(tag.data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
logger.debug(f"Added tag '{tag.get('title')}' ({uuid}) to backup")
|
||||
# Add the flask app secret (if it exists)
|
||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||
if os.path.isfile(secret_file):
|
||||
zipObj.write(secret_file, arcname="secret.txt")
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in watches.items():
|
||||
for f in Path(w.data_dir).glob('*'):
|
||||
for f in Path(w.watch_data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
# Use the full path to access the file, but make the file 'relative' in the Zip.
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
@@ -92,28 +88,25 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
from .restore import construct_restore_blueprint
|
||||
|
||||
backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
|
||||
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
|
||||
backup_threads = []
|
||||
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
def request_backup():
|
||||
if any(thread.is_alive() for thread in backup_threads):
|
||||
flash(gettext("A backup is already running, check back in a few minutes"), "error")
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
|
||||
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
# With immediate persistence, all data is already saved
|
||||
# Be sure we're written fresh - force immediate save using abstract method
|
||||
datastore.force_save_all()
|
||||
zip_thread = threading.Thread(
|
||||
target=create_backup,
|
||||
args=(datastore.datastore_path, datastore.data.get("watching")),
|
||||
kwargs={'tags': datastore.data['settings']['application'].get('tags', {})},
|
||||
daemon=True,
|
||||
name="BackupCreator"
|
||||
)
|
||||
@@ -121,7 +114,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
backup_threads.append(zip_thread)
|
||||
flash(gettext("Backup building in background, check back in a few minutes."))
|
||||
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
def find_backups():
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
@@ -141,43 +134,40 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return backup_info
|
||||
|
||||
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
|
||||
def download_backup(filename):
|
||||
import re
|
||||
filename = filename.strip()
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
||||
abort(404)
|
||||
|
||||
# Resolve 'latest' before any validation so checks run against the real filename.
|
||||
if filename == 'latest':
|
||||
backups = find_backups()
|
||||
if not backups:
|
||||
abort(404)
|
||||
filename = backups[0]['filename']
|
||||
|
||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
|
||||
abort(404)
|
||||
|
||||
logger.debug(f"Backup download request for '{full_path}'")
|
||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||
|
||||
@backups_blueprint.route("/", methods=['GET'])
|
||||
@backups_blueprint.route("/create", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def create():
|
||||
@backups_blueprint.route("", methods=['GET'])
|
||||
def index():
|
||||
backups = find_backups()
|
||||
output = render_template("backup_create.html",
|
||||
output = render_template("overview.html",
|
||||
available_backups=backups,
|
||||
backup_running=any(thread.is_alive() for thread in backup_threads)
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@backups_blueprint.route("/remove-backups", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/remove-backups", methods=['GET'])
|
||||
def remove_backups():
|
||||
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
@@ -187,6 +177,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
flash(gettext("Backups were deleted."))
|
||||
|
||||
return redirect(url_for('backups.create'))
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
return backups_blueprint
|
||||
|
||||
@@ -1,248 +0,0 @@
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
import threading
|
||||
import zipfile
|
||||
|
||||
from flask import Blueprint, render_template, flash, url_for, redirect, request
|
||||
from flask_babel import gettext, lazy_gettext as _l
|
||||
from wtforms import Form, BooleanField, SubmitField
|
||||
from flask_wtf.file import FileField, FileAllowed
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
|
||||
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
|
||||
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
|
||||
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
|
||||
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
|
||||
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
|
||||
_UUID_RE = re.compile(
|
||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
class RestoreForm(Form):
|
||||
zip_file = FileField(_l('Backup zip file'), validators=[
|
||||
FileAllowed(['zip'], _l('Must be a .zip backup file!'))
|
||||
])
|
||||
include_groups = BooleanField(_l('Include groups'), default=True)
|
||||
include_groups_replace_existing = BooleanField(_l('Replace existing groups of the same UUID'), default=True)
|
||||
include_watches = BooleanField(_l('Include watches'), default=True)
|
||||
include_watches_replace_existing = BooleanField(_l('Replace existing watches of the same UUID'), default=True)
|
||||
submit = SubmitField(_l('Restore backup'))
|
||||
|
||||
|
||||
def import_from_zip(zip_stream, datastore, include_groups, include_groups_replace, include_watches, include_watches_replace):
|
||||
"""
|
||||
Extract and import watches and groups from a backup zip stream.
|
||||
|
||||
Mirrors the store's _load_watches / _load_tags loading pattern:
|
||||
- UUID dirs with tag.json → Tag.model + tag_obj.commit()
|
||||
- UUID dirs with watch.json → rehydrate_entity + watch_obj.commit()
|
||||
|
||||
Returns a dict with counts: restored_groups, skipped_groups, restored_watches, skipped_watches.
|
||||
Raises zipfile.BadZipFile if the stream is not a valid zip.
|
||||
"""
|
||||
from changedetectionio.model import Tag
|
||||
|
||||
restored_groups = 0
|
||||
skipped_groups = 0
|
||||
restored_watches = 0
|
||||
skipped_watches = 0
|
||||
|
||||
current_tags = datastore.data['settings']['application'].get('tags', {})
|
||||
current_watches = datastore.data['watching']
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
||||
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
||||
total_uncompressed = sum(m.file_size for m in zf.infolist())
|
||||
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
|
||||
raise ValueError(
|
||||
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
|
||||
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
|
||||
)
|
||||
resolved_dest = os.path.realpath(tmpdir)
|
||||
for member in zf.infolist():
|
||||
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
|
||||
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
|
||||
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
|
||||
zf.extract(member, tmpdir)
|
||||
logger.debug("Restore: zip extracted, scanning UUID directories")
|
||||
|
||||
for entry in os.scandir(tmpdir):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
|
||||
uuid = entry.name
|
||||
if not _UUID_RE.match(uuid):
|
||||
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
|
||||
continue
|
||||
tag_json_path = os.path.join(entry.path, 'tag.json')
|
||||
watch_json_path = os.path.join(entry.path, 'watch.json')
|
||||
|
||||
# --- Tags (groups) ---
|
||||
if include_groups and os.path.exists(tag_json_path):
|
||||
if uuid in current_tags and not include_groups_replace:
|
||||
logger.debug(f"Restore: skipping existing group {uuid} (replace not requested)")
|
||||
skipped_groups += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(tag_json_path, 'r', encoding='utf-8') as f:
|
||||
tag_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.error(f"Restore: failed to read tag.json for {uuid}: {e}")
|
||||
continue
|
||||
|
||||
title = tag_data.get('title', uuid)
|
||||
logger.debug(f"Restore: importing group '{title}' ({uuid})")
|
||||
|
||||
# Mirror _load_tags: set uuid and force processor
|
||||
tag_data['uuid'] = uuid
|
||||
tag_data['processor'] = 'restock_diff'
|
||||
|
||||
# Copy the UUID directory so data_dir exists for commit()
|
||||
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
if os.path.exists(dst_dir):
|
||||
shutil.rmtree(dst_dir)
|
||||
shutil.copytree(entry.path, dst_dir)
|
||||
|
||||
tag_obj = Tag.model(
|
||||
datastore_path=datastore.datastore_path,
|
||||
__datastore=datastore.data,
|
||||
default=tag_data
|
||||
)
|
||||
current_tags[uuid] = tag_obj
|
||||
tag_obj.commit()
|
||||
restored_groups += 1
|
||||
logger.success(f"Restore: group '{title}' ({uuid}) restored")
|
||||
|
||||
# --- Watches ---
|
||||
elif include_watches and os.path.exists(watch_json_path):
|
||||
if uuid in current_watches and not include_watches_replace:
|
||||
logger.debug(f"Restore: skipping existing watch {uuid} (replace not requested)")
|
||||
skipped_watches += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(watch_json_path, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.error(f"Restore: failed to read watch.json for {uuid}: {e}")
|
||||
continue
|
||||
|
||||
url = watch_data.get('url', uuid)
|
||||
logger.debug(f"Restore: importing watch '{url}' ({uuid})")
|
||||
|
||||
# Copy UUID directory first so data_dir and history files exist
|
||||
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
if os.path.exists(dst_dir):
|
||||
shutil.rmtree(dst_dir)
|
||||
shutil.copytree(entry.path, dst_dir)
|
||||
|
||||
# Mirror _load_watches / rehydrate_entity
|
||||
watch_data['uuid'] = uuid
|
||||
watch_obj = datastore.rehydrate_entity(uuid, watch_data)
|
||||
current_watches[uuid] = watch_obj
|
||||
watch_obj.commit()
|
||||
restored_watches += 1
|
||||
logger.success(f"Restore: watch '{url}' ({uuid}) restored")
|
||||
|
||||
logger.debug(f"Restore: scan complete - groups {restored_groups} restored / {skipped_groups} skipped, "
|
||||
f"watches {restored_watches} restored / {skipped_watches} skipped")
|
||||
|
||||
# Persist changedetection.json (includes the updated tags dict)
|
||||
logger.debug("Restore: committing datastore settings")
|
||||
datastore.commit()
|
||||
|
||||
return {
|
||||
'restored_groups': restored_groups,
|
||||
'skipped_groups': skipped_groups,
|
||||
'restored_watches': restored_watches,
|
||||
'skipped_watches': skipped_watches,
|
||||
}
|
||||
|
||||
|
||||
|
||||
def construct_restore_blueprint(datastore):
|
||||
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
|
||||
restore_threads = []
|
||||
|
||||
@restore_blueprint.route("/restore", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def restore():
|
||||
form = RestoreForm()
|
||||
return render_template("backup_restore.html",
|
||||
form=form,
|
||||
restore_running=any(t.is_alive() for t in restore_threads),
|
||||
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
||||
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
||||
|
||||
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def backups_restore_start():
|
||||
if any(t.is_alive() for t in restore_threads):
|
||||
flash(gettext("A restore is already running, check back in a few minutes"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
zip_file = request.files.get('zip_file')
|
||||
if not zip_file or not zip_file.filename:
|
||||
flash(gettext("No file uploaded"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
if not zip_file.filename.lower().endswith('.zip'):
|
||||
flash(gettext("File must be a .zip backup file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Reject oversized uploads before reading the stream into memory.
|
||||
content_length = request.content_length
|
||||
if content_length and content_length > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
# Read into memory now — the request stream is gone once we return.
|
||||
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
|
||||
try:
|
||||
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
|
||||
if len(raw) > _MAX_UPLOAD_BYTES:
|
||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
zip_bytes = io.BytesIO(raw)
|
||||
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
|
||||
pass
|
||||
zip_bytes.seek(0)
|
||||
except zipfile.BadZipFile:
|
||||
flash(gettext("Invalid or corrupted zip file"), "error")
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
include_groups = request.form.get('include_groups') == 'y'
|
||||
include_groups_replace = request.form.get('include_groups_replace_existing') == 'y'
|
||||
include_watches = request.form.get('include_watches') == 'y'
|
||||
include_watches_replace = request.form.get('include_watches_replace_existing') == 'y'
|
||||
|
||||
restore_thread = threading.Thread(
|
||||
target=import_from_zip,
|
||||
kwargs={
|
||||
'zip_stream': zip_bytes,
|
||||
'datastore': datastore,
|
||||
'include_groups': include_groups,
|
||||
'include_groups_replace': include_groups_replace,
|
||||
'include_watches': include_watches,
|
||||
'include_watches_replace': include_watches_replace,
|
||||
},
|
||||
daemon=True,
|
||||
name="BackupRestore"
|
||||
)
|
||||
restore_thread.start()
|
||||
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
|
||||
restore_threads.append(restore_thread)
|
||||
flash(gettext("Restore started in background, check back in a few minutes."))
|
||||
return redirect(url_for('backups.restore.restore'))
|
||||
|
||||
return restore_blueprint
|
||||
@@ -1,49 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab active" id=""><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
<div id="general">
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<p>
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li>
|
||||
<a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary"
|
||||
href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error "
|
||||
href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -1,61 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field %}
|
||||
|
||||
<div class="edit-form">
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||
<li class="tab active"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="box-wrap inner">
|
||||
<div id="general">
|
||||
{% if restore_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A restore is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||
<p class="pure-form-message">
|
||||
{{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||
</p>
|
||||
|
||||
<form class="pure-form pure-form-stacked settings"
|
||||
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
||||
method="POST"
|
||||
enctype="multipart/form-data">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_groups) }}
|
||||
<span class="pure-form-message-inline">{{ _('Include all groups found in backup?') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_groups_replace_existing) }}
|
||||
<span class="pure-form-message-inline">{{ _('Replace any existing groups of the same UUID?') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_watches) }}
|
||||
<span class="pure-form-message-inline">{{ _('Include all watches found in backup?') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.include_watches_replace_existing) }}
|
||||
<span class="pure-form-message-inline">{{ _('Replace any existing watches of the same UUID?') }}</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.zip_file) }}
|
||||
</div>
|
||||
|
||||
<div class="pure-controls">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Restore backup') }}</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,36 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h2>{{ _('Backups') }}</h2>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>{{ _('No backups found.') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
{% endblock %}
|
||||
@@ -102,35 +102,6 @@ def run_async_in_browser_loop(coro):
|
||||
else:
|
||||
raise RuntimeError("Browser steps event loop is not available")
|
||||
|
||||
async def _close_session_resources(session_data, label=''):
|
||||
"""Close all browser resources for a session in the correct order.
|
||||
|
||||
browserstepper.cleanup() closes page+context but not the browser itself.
|
||||
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
|
||||
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
|
||||
"""
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
await browserstepper.cleanup()
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up browserstepper{label}: {e}")
|
||||
|
||||
browser = session_data.get('browser')
|
||||
if browser:
|
||||
try:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing browser{label}: {e}")
|
||||
|
||||
playwright_context = session_data.get('playwright_context')
|
||||
if playwright_context:
|
||||
try:
|
||||
await playwright_context.stop()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error stopping playwright context{label}: {e}")
|
||||
|
||||
|
||||
def cleanup_expired_sessions():
|
||||
"""Remove expired browsersteps sessions and cleanup their resources"""
|
||||
global browsersteps_sessions, browsersteps_watch_to_session
|
||||
@@ -148,10 +119,13 @@ def cleanup_expired_sessions():
|
||||
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
||||
session_data = browsersteps_sessions[session_id]
|
||||
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
# Cleanup playwright resources asynchronously
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
@@ -178,10 +152,12 @@ def cleanup_session_for_watch(watch_uuid):
|
||||
|
||||
session_data = browsersteps_sessions.get(session_id)
|
||||
if session_data:
|
||||
try:
|
||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}"))
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
browserstepper = session_data.get('browserstepper')
|
||||
if browserstepper:
|
||||
try:
|
||||
run_async_in_browser_loop(browserstepper.cleanup())
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||
|
||||
# Remove from sessions dict
|
||||
del browsersteps_sessions[session_id]
|
||||
@@ -198,80 +174,71 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||
|
||||
async def start_browsersteps_session(watch_uuid):
|
||||
from changedetectionio.browser_steps import browser_steps
|
||||
from . import browser_steps
|
||||
import time
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
# We keep the playwright session open for many minutes
|
||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
|
||||
browsersteps_start_session = {'start_time': time.time()}
|
||||
|
||||
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url')
|
||||
if proxy_url:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
# Create a new async playwright instance for browser steps
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
|
||||
# Resolve the fetcher class for this watch so we can ask it to launch its own browser
|
||||
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
|
||||
watch = datastore.data['watching'][watch_uuid]
|
||||
from changedetectionio import content_fetchers
|
||||
fetcher_name = watch.get_fetch_backend or 'system'
|
||||
if fetcher_name == 'system':
|
||||
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
||||
|
||||
browser = None
|
||||
playwright_context = None
|
||||
|
||||
# If the fetcher has its own browser launch for the live steps UI, use it.
|
||||
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
|
||||
# or None to fall back to the default CDP path.
|
||||
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
|
||||
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
|
||||
if result is not None:
|
||||
browser, playwright_context = result
|
||||
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_name}'")
|
||||
|
||||
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
|
||||
if browser is None:
|
||||
playwright_instance = async_playwright()
|
||||
playwright_context = await playwright_instance.start()
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if '?' not in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
|
||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||
a = "?" if not '?' in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
|
||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||
browsersteps_start_session['browser'] = browser
|
||||
browsersteps_start_session['playwright_context'] = playwright_context
|
||||
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
if proxy_id:
|
||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||
if proxy_url:
|
||||
|
||||
# Playwright needs separate username and password values
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_url)
|
||||
proxy = {'server': proxy_url}
|
||||
|
||||
if parsed.username:
|
||||
proxy['username'] = parsed.username
|
||||
|
||||
if parsed.password:
|
||||
proxy['password'] = parsed.password
|
||||
|
||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||
|
||||
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||
browserstepper = browser_steps.browsersteps_live_ui(
|
||||
playwright_browser=browser,
|
||||
proxy=proxy,
|
||||
start_url=watch.link,
|
||||
headers=watch.get('headers')
|
||||
start_url=datastore.data['watching'][watch_uuid].link,
|
||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||
)
|
||||
|
||||
# Initialize the async connection
|
||||
await browserstepper.connect(proxy=proxy)
|
||||
|
||||
browsersteps_start_session['browserstepper'] = browserstepper
|
||||
|
||||
# For test
|
||||
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||
|
||||
return browsersteps_start_session
|
||||
|
||||
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||
def browsersteps_start_session():
|
||||
# A new session was requested, return sessionID
|
||||
import asyncio
|
||||
import uuid
|
||||
browsersteps_session_id = str(uuid.uuid4())
|
||||
watch_uuid = request.args.get('uuid')
|
||||
@@ -304,8 +271,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logger.debug("Starting connection with playwright - done")
|
||||
return {'browsersteps_session_id': browsersteps_session_id}
|
||||
|
||||
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
|
||||
def browser_steps_fetch_screenshot_image():
|
||||
from flask import (
|
||||
make_response,
|
||||
@@ -318,8 +285,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
filename = f"step_before-{step_n}.jpeg" if request.args.get('type', '') == 'before' else f"step_{step_n}.jpeg"
|
||||
|
||||
if step_n and watch and os.path.isfile(os.path.join(watch.data_dir, filename)):
|
||||
response = make_response(send_from_directory(directory=watch.data_dir, path=filename))
|
||||
if step_n and watch and os.path.isfile(os.path.join(watch.watch_data_dir, filename)):
|
||||
response = make_response(send_from_directory(directory=watch.watch_data_dir, path=filename))
|
||||
response.headers['Content-type'] = 'image/jpeg'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
@@ -330,14 +297,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)
|
||||
|
||||
# A request for an action was received
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
@login_optionally_required
|
||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||
def browsersteps_ui_update():
|
||||
import base64
|
||||
import playwright._impl._errors
|
||||
from changedetectionio.blueprint.browser_steps import browser_steps
|
||||
|
||||
remaining = 0
|
||||
remaining =0
|
||||
uuid = request.args.get('uuid')
|
||||
goto_website_url_first_step = request.args.get('goto_website_url_first_step')
|
||||
|
||||
browsersteps_session_id = request.args.get('browsersteps_session_id')
|
||||
|
||||
@@ -348,33 +316,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response('No session exists under that ID', 500)
|
||||
|
||||
is_last_step = False
|
||||
|
||||
# @todo - should always be an existing session
|
||||
if goto_website_url_first_step:
|
||||
logger.debug("Going to site (requested automatically before stepping)..")
|
||||
step_operation = "Goto site"
|
||||
step_selector = None
|
||||
step_optional_value = None
|
||||
else:
|
||||
# Actions - step/apply/etc, do the thing and return state
|
||||
if request.method == 'POST':
|
||||
# @todo - should always be an existing session
|
||||
step_operation = request.form.get('operation')
|
||||
step_selector = request.form.get('selector')
|
||||
step_optional_value = request.form.get('optional_value')
|
||||
is_last_step = strtobool(request.form.get('is_last_step'))
|
||||
|
||||
try:
|
||||
# Run the async call_action method in the dedicated browser steps event loop
|
||||
run_async_in_browser_loop(
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
|
||||
action_name=step_operation,
|
||||
selector=step_selector,
|
||||
optional_value=step_optional_value
|
||||
try:
|
||||
# Run the async call_action method in the dedicated browser steps event loop
|
||||
run_async_in_browser_loop(
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
|
||||
action_name=step_operation,
|
||||
selector=step_selector,
|
||||
optional_value=step_optional_value
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
|
||||
# Try to find something of value to give back to the user
|
||||
return make_response(str(e).splitlines()[0], 401)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
|
||||
# Try to find something of value to give back to the user
|
||||
return make_response(str(e).splitlines()[0], 401)
|
||||
|
||||
|
||||
# if not this_session.page:
|
||||
# cleanup_playwright_session()
|
||||
# return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
||||
|
||||
# Screenshots and other info only needed on requesting a step (POST)
|
||||
try:
|
||||
@@ -382,7 +350,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
(screenshot, xpath_data) = run_async_in_browser_loop(
|
||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||
)
|
||||
|
||||
|
||||
if is_last_step:
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||
|
||||
-11
@@ -8,17 +8,6 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
|
||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
|
||||
def browser_steps_get_valid_steps(browser_steps: list):
|
||||
if browser_steps is not None and len(browser_steps):
|
||||
valid_steps = list(filter(
|
||||
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one'),browser_steps))
|
||||
|
||||
# Just incase they selected Goto site by accident with older JS
|
||||
if valid_steps and valid_steps[0]['operation'] == 'Goto site':
|
||||
del(valid_steps[0])
|
||||
|
||||
return valid_steps
|
||||
return []
|
||||
|
||||
|
||||
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
||||
@@ -40,13 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
contents = ''
|
||||
now = time.time()
|
||||
try:
|
||||
import asyncio
|
||||
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
|
||||
asyncio.run(update_handler.call_browser(preferred_proxy_id=preferred_proxy))
|
||||
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
|
||||
# title, size is len contents not len xfer
|
||||
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 404:
|
||||
@@ -95,13 +94,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return results
|
||||
|
||||
@login_required
|
||||
@check_proxies_blueprint.route("/<uuid_str:uuid>/status", methods=['GET'])
|
||||
@check_proxies_blueprint.route("/<string:uuid>/status", methods=['GET'])
|
||||
def get_recheck_status(uuid):
|
||||
results = _recalc_check_status(uuid=uuid)
|
||||
return results
|
||||
|
||||
@login_required
|
||||
@check_proxies_blueprint.route("/<uuid_str:uuid>/start", methods=['GET'])
|
||||
@check_proxies_blueprint.route("/<string:uuid>/start", methods=['GET'])
|
||||
def start_check(uuid):
|
||||
|
||||
if not datastore.proxy_list:
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
|
||||
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
|
||||
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
|
||||
<li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -17,11 +16,6 @@
|
||||
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<div class="tab-pane-inner" id="url-list">
|
||||
|
||||
<p>
|
||||
{{ _('Restoring changedetection.io backups is in the') }}<a href="{{ url_for('backups.restore.restore') }}"> {{ _('backups section') }}</a>.
|
||||
<br>
|
||||
</p>
|
||||
<div class="pure-control-group">
|
||||
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
|
||||
<br>
|
||||
@@ -43,6 +37,9 @@
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="distill-io">
|
||||
|
||||
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
|
||||
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
|
||||
@@ -52,6 +49,8 @@
|
||||
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
|
||||
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
|
||||
font-family:monospace;
|
||||
white-space: pre;
|
||||
@@ -115,7 +114,6 @@
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
|
||||
|
||||
</form>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -15,20 +15,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<uuid_str:uuid>/accept", methods=['GET'])
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
|
||||
def accept(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
datastore.data['watching'][uuid].commit()
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<uuid_str:uuid>/reject", methods=['GET'])
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
||||
def reject(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
||||
datastore.data['watching'][uuid].commit()
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
|
||||
|
||||
@@ -9,12 +9,11 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/watch/<uuid_str:uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
|
||||
def rss_single_watch(uuid):
|
||||
import time
|
||||
|
||||
from flask import make_response, request, Response
|
||||
from flask_babel import lazy_gettext as _l
|
||||
from flask import make_response, request
|
||||
from feedgen.feed import FeedGenerator
|
||||
from loguru import logger
|
||||
|
||||
@@ -43,12 +42,12 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
# Get the watch by UUID
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
return Response(_l("Watch with UUID %(uuid)s not found", uuid=uuid), status=404, mimetype='text/plain')
|
||||
return f"Watch with UUID {uuid} not found", 404
|
||||
|
||||
# Check if watch has at least 2 history snapshots
|
||||
dates = list(watch.history.keys())
|
||||
if len(dates) < 2:
|
||||
return Response(_l("Watch %(uuid)s does not have enough history snapshots to show changes (need at least 2)", uuid=uuid), status=400, mimetype='text/plain')
|
||||
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
|
||||
|
||||
# Get the number of diffs to include (default: 5)
|
||||
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
|
||||
|
||||
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
|
||||
datastore: The ChangeDetectionStore instance
|
||||
"""
|
||||
|
||||
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
|
||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||
def rss_tag_feed(tag_uuid):
|
||||
|
||||
from flask import make_response, request, url_for
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import os
|
||||
from copy import deepcopy
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime
|
||||
from zoneinfo import ZoneInfo, available_timezones
|
||||
import secrets
|
||||
import time
|
||||
import flask_login
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
@@ -75,17 +74,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
del (app_update['password'])
|
||||
|
||||
datastore.data['settings']['application'].update(app_update)
|
||||
|
||||
|
||||
# Handle dynamic worker count adjustment
|
||||
old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
|
||||
new_worker_count = form.data['requests'].get('workers', 1)
|
||||
|
||||
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||
datastore.commit()
|
||||
|
||||
# Clear all checksums to force reprocessing with new settings
|
||||
# Global settings can affect watch behavior (filters, rendering, etc.)
|
||||
datastore.clear_all_last_checksums()
|
||||
|
||||
# Adjust worker count if it changed
|
||||
if new_worker_count != old_worker_count:
|
||||
@@ -115,11 +109,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
|
||||
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
|
||||
datastore.commit()
|
||||
datastore.needs_write_urgent = True
|
||||
flash(gettext("Password protection enabled."), 'notice')
|
||||
flask_login.logout_user()
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Also save plugin settings from the same form submission
|
||||
plugin_tabs_list = get_plugin_settings_tabs()
|
||||
for tab in plugin_tabs_list:
|
||||
@@ -147,9 +143,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
active_plugins = get_active_plugins()
|
||||
python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
||||
|
||||
# Calculate uptime in seconds
|
||||
uptime_seconds = time.time() - datastore.start_time
|
||||
|
||||
# Get plugin settings tabs and instantiate forms
|
||||
plugin_tabs = get_plugin_settings_tabs()
|
||||
plugin_forms = {}
|
||||
@@ -168,7 +161,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
active_plugins=active_plugins,
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
python_version=python_version,
|
||||
uptime_seconds=uptime_seconds,
|
||||
available_timezones=sorted(available_timezones()),
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
|
||||
@@ -189,7 +181,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def settings_reset_api_key():
|
||||
secret = secrets.token_hex(16)
|
||||
datastore.data['settings']['application']['api_access_token'] = secret
|
||||
datastore.commit()
|
||||
datastore.needs_write_urgent = True
|
||||
flash(gettext("API Key was regenerated."))
|
||||
return redirect(url_for('settings.settings_page')+'#api')
|
||||
|
||||
@@ -206,7 +198,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def toggle_all_paused():
|
||||
current_state = datastore.data['settings']['application'].get('all_paused', False)
|
||||
datastore.data['settings']['application']['all_paused'] = not current_state
|
||||
datastore.commit()
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
if datastore.data['settings']['application']['all_paused']:
|
||||
flash(gettext("Automatic scheduling paused - checks will not be queued."), 'notice')
|
||||
@@ -220,7 +212,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def toggle_all_muted():
|
||||
current_state = datastore.data['settings']['application'].get('all_muted', False)
|
||||
datastore.data['settings']['application']['all_muted'] = not current_state
|
||||
datastore.commit()
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
if datastore.data['settings']['application']['all_muted']:
|
||||
flash(gettext("All notifications muted."), 'notice')
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
@@ -154,8 +154,9 @@
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -351,7 +352,7 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
@@ -393,7 +394,6 @@ nav
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<div class="tab-pane-inner" id="info">
|
||||
<p><strong>{{ _('Uptime:') }}</strong> {{ uptime_seconds|format_duration }}</p>
|
||||
<p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
|
||||
<p><strong>{{ _('Plugins active:') }}</strong></p>
|
||||
{% if active_plugins %}
|
||||
|
||||
@@ -54,16 +54,14 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/mute/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def mute(uuid):
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if tag:
|
||||
tag['notification_muted'] = not tag['notification_muted']
|
||||
tag.commit()
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = not datastore.data['settings']['application']['tags'][uuid]['notification_muted']
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete(uuid):
|
||||
# Delete the tag from settings immediately
|
||||
@@ -78,7 +76,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
watch.commit()
|
||||
removed_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} removed from {removed_count} watches")
|
||||
except Exception as e:
|
||||
@@ -90,7 +87,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Tag deleted, removing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/unlink/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/unlink/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def unlink(uuid):
|
||||
# Unlink tag from all watches in background thread to avoid blocking
|
||||
@@ -101,7 +98,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
watch.commit()
|
||||
unlinked_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} unlinked from {unlinked_count} watches")
|
||||
except Exception as e:
|
||||
@@ -116,11 +112,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@tags_blueprint.route("/delete_all", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete_all():
|
||||
|
||||
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
|
||||
# TagsDict 'del' handler will remove the dir
|
||||
del datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
|
||||
# Clear all tags from settings immediately
|
||||
datastore.data['settings']['application']['tags'] = {}
|
||||
|
||||
# Clear tags from all watches in background thread to avoid blocking
|
||||
def clear_all_tags_background():
|
||||
@@ -129,7 +122,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
watch['tags'] = []
|
||||
watch.commit()
|
||||
cleared_count += 1
|
||||
logger.info(f"Background: Cleared tags from {cleared_count} watches")
|
||||
except Exception as e:
|
||||
@@ -141,7 +133,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("All tags deleted, clearing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET'])
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def form_tag_edit(uuid):
|
||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||
@@ -160,21 +152,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
default_system_settings = datastore.data['settings'],
|
||||
)
|
||||
|
||||
# Bridge API-stored processor_config_* values into the form's FormField sub-forms.
|
||||
# The API stores processor_config_restock_diff in the tag dict; find the matching
|
||||
# FormField by checking which one's sub-fields cover the config keys.
|
||||
from wtforms.fields.form import FormField as WTFormField
|
||||
for key, value in default.items():
|
||||
if not key.startswith('processor_config_') or not isinstance(value, dict):
|
||||
continue
|
||||
for form_field in form:
|
||||
if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value):
|
||||
for sub_key, sub_value in value.items():
|
||||
sub_field = form_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
break
|
||||
|
||||
template_args = {
|
||||
'data': default,
|
||||
'form': form,
|
||||
@@ -218,17 +195,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return output
|
||||
|
||||
|
||||
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['POST'])
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def form_tag_edit_submit(uuid):
|
||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
|
||||
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
default = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
|
||||
form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None,
|
||||
data=tag,
|
||||
data=default,
|
||||
extra_notification_tokens=datastore.get_unique_notification_tokens_available()
|
||||
)
|
||||
# @todo subclass form so validation works
|
||||
@@ -237,18 +214,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# flash(','.join(l), 'error')
|
||||
# return redirect(url_for('tags.form_tag_edit_submit', uuid=uuid))
|
||||
|
||||
tag.update(form.data)
|
||||
tag['processor'] = 'restock_diff'
|
||||
tag.commit()
|
||||
|
||||
# Clear checksums for all watches using this tag to force reprocessing
|
||||
# Tag changes affect inherited configuration
|
||||
cleared_count = datastore.clear_checksums_for_tag(uuid)
|
||||
logger.info(f"Tag {uuid} updated, cleared {cleared_count} watch checksums")
|
||||
|
||||
datastore.data['settings']['application']['tags'][uuid].update(form.data)
|
||||
datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.needs_write_urgent = True
|
||||
flash(gettext("Updated"))
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
def form_tag_delete(uuid):
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
return tags_blueprint
|
||||
|
||||
@@ -24,7 +24,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['paused'] = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches paused").format(len(uuids)))
|
||||
|
||||
@@ -32,7 +32,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches unpaused").format(len(uuids)))
|
||||
|
||||
@@ -47,7 +47,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches muted").format(len(uuids)))
|
||||
|
||||
@@ -55,7 +55,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches un-muted").format(len(uuids)))
|
||||
|
||||
@@ -71,7 +71,7 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]["last_error"] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches errors cleared").format(len(uuids)))
|
||||
|
||||
@@ -92,7 +92,6 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
datastore.data['watching'][uuid]['notification_body'] = None
|
||||
datastore.data['watching'][uuid]['notification_urls'] = []
|
||||
datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches set to use default notification settings").format(len(uuids)))
|
||||
|
||||
@@ -108,7 +107,6 @@ def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchM
|
||||
datastore.data['watching'][uuid]['tags'] = []
|
||||
|
||||
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches were tagged").format(len(uuids)))
|
||||
|
||||
@@ -141,7 +139,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
# Import the login decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@ui_blueprint.route("/clear_history/<uuid_str:uuid>", methods=['GET'])
|
||||
@ui_blueprint.route("/clear_history/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def clear_watch_history(uuid):
|
||||
try:
|
||||
@@ -156,9 +154,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
@login_optionally_required
|
||||
def clear_all_history():
|
||||
if request.method == 'POST':
|
||||
confirmtext = request.form.get('confirmtext', '')
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext.strip().lower() == gettext('clear').strip().lower():
|
||||
if confirmtext == 'clear':
|
||||
# Run in background thread to avoid blocking
|
||||
def clear_history_background():
|
||||
# Capture UUIDs first to avoid race conditions
|
||||
@@ -194,9 +192,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
tag_limit = request.args.get('tag')
|
||||
now = int(time.time())
|
||||
|
||||
# Mark watches as viewed - use background thread only for large watch counts
|
||||
def mark_viewed_impl():
|
||||
"""Mark watches as viewed - can run synchronously or in background thread."""
|
||||
# Mark watches as viewed in background thread to avoid blocking
|
||||
def mark_viewed_background():
|
||||
"""Background thread to mark watches as viewed - discarded after completion."""
|
||||
marked_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
@@ -209,21 +207,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
marked_count += 1
|
||||
|
||||
logger.info(f"Marking complete: {marked_count} watches marked as viewed")
|
||||
logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
|
||||
except Exception as e:
|
||||
logger.error(f"Error marking as viewed: {e}")
|
||||
logger.error(f"Error in background mark as viewed: {e}")
|
||||
|
||||
# For small watch counts (< 10), run synchronously to avoid race conditions in tests
|
||||
# For larger counts, use background thread to avoid blocking the UI
|
||||
watch_count = len(datastore.data['watching'])
|
||||
if watch_count < 10:
|
||||
# Run synchronously for small watch counts
|
||||
mark_viewed_impl()
|
||||
else:
|
||||
# Start background thread for large watch counts
|
||||
thread = threading.Thread(target=mark_viewed_impl, daemon=True)
|
||||
thread.start()
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=mark_viewed_background, daemon=True)
|
||||
thread.start()
|
||||
|
||||
flash(gettext("Marking watches as viewed in background..."))
|
||||
return redirect(url_for('watchlist.index', tag=tag_limit))
|
||||
|
||||
@ui_blueprint.route("/delete", methods=['GET'])
|
||||
@@ -366,7 +358,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
|
||||
@ui_blueprint.route("/share-url/<uuid_str:uuid>", methods=['GET'])
|
||||
@ui_blueprint.route("/share-url/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def form_share_put_watch(uuid):
|
||||
"""Given a watch UUID, upload the info and return a share-link
|
||||
|
||||
@@ -66,7 +66,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return Markup(result)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
"""
|
||||
@@ -128,7 +128,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_GET(uuid):
|
||||
"""
|
||||
@@ -182,7 +182,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['POST'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_extract_POST(uuid):
|
||||
"""
|
||||
@@ -238,7 +238,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
@diff_blueprint.route("/diff/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
|
||||
@@ -20,13 +20,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
|
||||
return True
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST'])
|
||||
@login_optionally_required
|
||||
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
||||
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
|
||||
def edit_page(uuid):
|
||||
from changedetectionio import forms
|
||||
from changedetectionio.browser_steps.browser_steps import browser_step_ui_config
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
|
||||
from changedetectionio import processors
|
||||
import importlib
|
||||
|
||||
@@ -117,25 +117,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||
|
||||
if processor_config:
|
||||
from wtforms.fields.form import FormField
|
||||
# Populate processor-config-* fields from JSON
|
||||
for config_key, config_value in processor_config.items():
|
||||
if not isinstance(config_value, dict):
|
||||
continue
|
||||
# Try exact API-named field first (e.g., processor_config_restock_diff)
|
||||
target_field = getattr(form, f'processor_config_{config_key}', None)
|
||||
# Fallback: find any FormField sub-form whose fields cover config_value keys
|
||||
if target_field is None:
|
||||
for form_field in form:
|
||||
if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value):
|
||||
target_field = form_field
|
||||
break
|
||||
if target_field is not None:
|
||||
for sub_key, sub_value in config_value.items():
|
||||
sub_field = target_field.form._fields.get(sub_key)
|
||||
if sub_field is not None:
|
||||
sub_field.data = sub_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}")
|
||||
field_name = f'processor_config_{config_key}'
|
||||
if hasattr(form, field_name):
|
||||
getattr(form, field_name).data = config_value
|
||||
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load processor config: {e}")
|
||||
|
||||
@@ -211,10 +198,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Recast it if need be to right data Watch handler
|
||||
watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore.data, default=datastore.data['watching'][uuid])
|
||||
|
||||
# Save the watch immediately
|
||||
datastore.data['watching'][uuid].commit()
|
||||
|
||||
flash(gettext("Updated watch - unpaused!") if request.args.get('unpause_on_save') else gettext("Updated watch."))
|
||||
|
||||
# Cleanup any browsersteps session for this watch
|
||||
@@ -224,6 +207,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
except Exception as e:
|
||||
logger.debug(f"Error cleaning up browsersteps session: {e}")
|
||||
|
||||
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
|
||||
# But in the case something is added we should save straight away
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Do not queue on edit if its not within the time range
|
||||
|
||||
# @todo maybe it should never queue anyway on edit...
|
||||
@@ -340,7 +327,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
return output
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-html", methods=['GET'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/get-html", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def watch_get_latest_html(uuid):
|
||||
from io import BytesIO
|
||||
@@ -350,9 +337,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.data_dir):
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[-1]
|
||||
html_fname = os.path.join(watch.data_dir, f"{latest_filename}.html.br")
|
||||
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
@@ -367,58 +354,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Return a 500 error
|
||||
abort(500)
|
||||
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-data-package", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def watch_get_data_package(uuid):
|
||||
"""Download all data for a single watch as a zip file"""
|
||||
from io import BytesIO
|
||||
from flask import send_file
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
import datetime
|
||||
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404)
|
||||
|
||||
# Create zip in memory
|
||||
memory_file = BytesIO()
|
||||
|
||||
with zipfile.ZipFile(memory_file, 'w',
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8) as zipObj:
|
||||
|
||||
# Add the watch's JSON file if it exists
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
if os.path.isfile(watch_json_path):
|
||||
zipObj.write(watch_json_path,
|
||||
arcname=os.path.join(uuid, 'watch.json'),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
|
||||
# Add all files in the watch data directory
|
||||
if os.path.isdir(watch.data_dir):
|
||||
for f in Path(watch.data_dir).glob('*'):
|
||||
if f.is_file() and f.name != 'watch.json': # Skip watch.json since we already added it
|
||||
zipObj.write(f,
|
||||
arcname=os.path.join(uuid, f.name),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
|
||||
# Seek to beginning of file
|
||||
memory_file.seek(0)
|
||||
|
||||
# Generate filename with timestamp
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
filename = f"watch-data-{uuid[:8]}-{timestamp}.zip"
|
||||
|
||||
return send_file(memory_file,
|
||||
as_attachment=True,
|
||||
download_name=filename,
|
||||
mimetype='application/zip')
|
||||
|
||||
# Ajax callback
|
||||
@edit_blueprint.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])
|
||||
@edit_blueprint.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def watch_get_preview_rendered(uuid):
|
||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||
@@ -449,9 +386,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
s = re.sub(r'[0-9]+', r'\\d+', s)
|
||||
datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/')
|
||||
|
||||
# Save the updated ignore_text
|
||||
datastore.data["watching"][uuid].commit()
|
||||
|
||||
return f"<a href={url_for('ui.ui_preview.preview_page', uuid=uuid)}>Click to preview</a>"
|
||||
|
||||
return edit_blueprint
|
||||
@@ -10,8 +10,7 @@ from changedetectionio import html_tools
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def preview_page(uuid):
|
||||
"""
|
||||
@@ -60,8 +59,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
versions = []
|
||||
timestamp = None
|
||||
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
triggered_line_numbers = []
|
||||
ignored_line_numbers = []
|
||||
@@ -71,9 +74,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||
else:
|
||||
# So prepare the latest preview or not
|
||||
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
|
||||
|
||||
|
||||
preferred_version = request.args.get('version')
|
||||
versions = list(watch.history.keys())
|
||||
timestamp = versions[-1]
|
||||
if preferred_version and preferred_version in versions:
|
||||
@@ -124,7 +125,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
return output
|
||||
|
||||
@preview_blueprint.route("/preview/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@preview_blueprint.route("/preview/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def processor_asset(uuid, asset_name):
|
||||
"""
|
||||
|
||||
@@ -488,7 +488,6 @@ Math: {{ 1 + 1 }}") }}
|
||||
{% if watch.history_n %}
|
||||
<p>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">{{ _('Download latest HTML snapshot') }}</a>
|
||||
<a href="{{url_for('ui.ui_edit.watch_get_data_package', uuid=uuid)}}" class="pure-button button-small">{{ _('Download watch data package') }}</a>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||
{% if versions|length >= 2 %}
|
||||
<div id="diff-form" style="text-align: center;">
|
||||
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
|
||||
<form class="pure-form " action="" method="POST">
|
||||
<fieldset>
|
||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||
name="from_version"
|
||||
@@ -28,7 +28,6 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||
|
||||
</fieldset>
|
||||
|
||||
@@ -39,7 +39,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
elif op == 'mute':
|
||||
datastore.data['watching'][uuid].toggle_mute()
|
||||
|
||||
datastore.data['watching'][uuid].commit()
|
||||
datastore.needs_write = True
|
||||
return redirect(url_for('watchlist.index', tag = active_tag_uuid))
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
@@ -81,7 +81,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
proxy_list = datastore.proxy_list
|
||||
output = render_template(
|
||||
"watch-overview.html",
|
||||
active_tag=active_tag,
|
||||
@@ -93,7 +92,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
form=form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=proxy_list,
|
||||
has_proxies=datastore.proxy_list,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
@@ -111,16 +110,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
# Return freed template-building memory to the OS immediately.
|
||||
# render_template allocates ~20MB of intermediate strings that are freed on return,
|
||||
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
|
||||
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if session.get('share-link'):
|
||||
del (session['share-link'])
|
||||
|
||||
|
||||
@@ -14,46 +14,6 @@
|
||||
// Initialize Feather icons after the page loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
feather.replace();
|
||||
|
||||
// Intersection Observer for lazy loading favicons
|
||||
// Only load favicon images when they enter the viewport
|
||||
if ('IntersectionObserver' in window) {
|
||||
const faviconObserver = new IntersectionObserver((entries, observer) => {
|
||||
entries.forEach(entry => {
|
||||
if (entry.isIntersecting) {
|
||||
const img = entry.target;
|
||||
const src = img.getAttribute('data-src');
|
||||
|
||||
if (src) {
|
||||
// Load the actual favicon
|
||||
img.src = src;
|
||||
img.removeAttribute('data-src');
|
||||
}
|
||||
|
||||
// Stop observing this image
|
||||
observer.unobserve(img);
|
||||
}
|
||||
});
|
||||
}, {
|
||||
// Start loading slightly before the image enters viewport
|
||||
rootMargin: '50px',
|
||||
threshold: 0.01
|
||||
});
|
||||
|
||||
// Observe all lazy favicon images
|
||||
document.querySelectorAll('.lazy-favicon').forEach(img => {
|
||||
faviconObserver.observe(img);
|
||||
});
|
||||
} else {
|
||||
// Fallback for older browsers: load all favicons immediately
|
||||
document.querySelectorAll('.lazy-favicon').forEach(img => {
|
||||
const src = img.getAttribute('data-src');
|
||||
if (src) {
|
||||
img.src = src;
|
||||
img.removeAttribute('data-src');
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<style>
|
||||
@@ -213,13 +173,12 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{%- set checking_now = is_checking_now(watch) -%}
|
||||
{%- set history_n = watch.history_n -%}
|
||||
{%- set favicon = watch.get_favicon_filename() -%}
|
||||
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
|
||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{%- set row_classes = [
|
||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||
'processor-' ~ watch['processor'],
|
||||
'has-error' if error_texts|length > 2 else '',
|
||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||
'unviewed' if watch.has_unviewed else '',
|
||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||
@@ -247,17 +206,8 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
<td class="title-col inline">
|
||||
<div class="flex-wrapper">
|
||||
{% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
|
||||
<div>
|
||||
{# Intersection Observer lazy loading: store real URL in data-src, load only when visible in viewport #}
|
||||
<img alt="Favicon thumbnail"
|
||||
class="favicon lazy-favicon"
|
||||
loading="lazy"
|
||||
decoding="async"
|
||||
fetchpriority="low"
|
||||
{% if favicon %}
|
||||
data-src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}"
|
||||
{% endif %}
|
||||
src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E'>
|
||||
<div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder' #}
|
||||
<img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {% endif %} >
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
@@ -272,7 +222,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
@@ -305,20 +255,11 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
</span>
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch.get('restock') and watch['restock'].get('price') -%}
|
||||
{%- set restock = watch['restock'] -%}
|
||||
{%- set price = restock.get('price') -%}
|
||||
{%- set cur = restock.get('currency','') -%}
|
||||
|
||||
{%- if price is not none and (price|string)|regex_search('\d') -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
|
||||
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
|
||||
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
|
||||
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
|
||||
{{ price }} {{ cur }}<!-- as string -->
|
||||
{%- endif -%}
|
||||
</span>
|
||||
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
|
||||
{%- if watch['restock']['price'] != None -%}
|
||||
<span class="restock-label price" title="{{ _('Price') }}">
|
||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
||||
</span>
|
||||
{%- endif -%}
|
||||
{%- elif not watch.has_restock_info -%}
|
||||
<span class="restock-label error">{{ _('No information') }}</span>
|
||||
|
||||
@@ -38,6 +38,7 @@ def manage_user_agent(headers, current_ua=''):
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class Fetcher():
|
||||
browser_connection_is_custom = None
|
||||
browser_connection_url = None
|
||||
@@ -162,16 +163,30 @@ class Fetcher():
|
||||
"""
|
||||
return {k.lower(): v for k, v in self.headers.items()}
|
||||
|
||||
def browser_steps_get_valid_steps(self):
|
||||
if self.browser_steps is not None and len(self.browser_steps):
|
||||
valid_steps = list(filter(
|
||||
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one'),
|
||||
self.browser_steps))
|
||||
|
||||
# Just incase they selected Goto site by accident with older JS
|
||||
if valid_steps and valid_steps[0]['operation'] == 'Goto site':
|
||||
del(valid_steps[0])
|
||||
|
||||
return valid_steps
|
||||
|
||||
return None
|
||||
|
||||
async def iterate_browser_steps(self, start_url=None):
|
||||
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface, browser_steps_get_valid_steps
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||
from playwright._impl._errors import TimeoutError, Error
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
step_n = 0
|
||||
|
||||
if self.browser_steps:
|
||||
if self.browser_steps is not None and len(self.browser_steps):
|
||||
interface = steppable_browser_interface(start_url=start_url)
|
||||
interface.page = self.page
|
||||
valid_steps = browser_steps_get_valid_steps(self.browser_steps)
|
||||
valid_steps = self.browser_steps_get_valid_steps()
|
||||
|
||||
for step in valid_steps:
|
||||
step_n += 1
|
||||
|
||||
@@ -295,7 +295,7 @@ class fetcher(Fetcher):
|
||||
self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
||||
|
||||
# Re-use as much code from browser steps as possible so its the same
|
||||
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||
browsersteps_interface = steppable_browser_interface(start_url=url)
|
||||
browsersteps_interface.page = self.page
|
||||
|
||||
@@ -362,7 +362,7 @@ class fetcher(Fetcher):
|
||||
# Wrap remaining operations in try/finally to ensure cleanup
|
||||
try:
|
||||
# Run Browser Steps here
|
||||
if self.browser_steps:
|
||||
if self.browser_steps_get_valid_steps():
|
||||
try:
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
except BrowserStepsStepException:
|
||||
|
||||
@@ -86,8 +86,8 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
"""(y) => {
|
||||
const el = document.scrollingElement;
|
||||
if (el) el.scrollTop = y;
|
||||
document.documentElement.scrollTop = y;
|
||||
document.body.scrollTop = y;
|
||||
}""",
|
||||
y
|
||||
)
|
||||
@@ -305,8 +305,6 @@ class fetcher(Fetcher):
|
||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
||||
finally:
|
||||
self.browser = None
|
||||
raise
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
@@ -458,7 +456,7 @@ class fetcher(Fetcher):
|
||||
|
||||
# Run Browser Steps here
|
||||
# @todo not yet supported, we switch to playwright in this case
|
||||
# if self.browser_steps:
|
||||
# if self.browser_steps_get_valid_steps():
|
||||
# self.iterate_browser_steps()
|
||||
|
||||
|
||||
@@ -534,14 +532,6 @@ class fetcher(Fetcher):
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
finally:
|
||||
# Internal cleanup on any exception/timeout - call quit() immediately
|
||||
# This prevents connection leaks during exception bursts
|
||||
# Worker.py's quit() call becomes a redundant safety net (idempotent)
|
||||
try:
|
||||
await self.quit(watch={'uuid': watch_uuid} if watch_uuid else None)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Error during internal quit() cleanup: {cleanup_error}")
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
from loguru import logger
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import asyncio
|
||||
|
||||
from functools import partial
|
||||
from changedetectionio import strtobool
|
||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
|
||||
|
||||
# "html_requests" is listed as the default fetcher in store.py!
|
||||
@@ -38,7 +36,7 @@ class fetcher(Fetcher):
|
||||
import requests
|
||||
from requests.exceptions import ProxyError, ConnectionError, RequestException
|
||||
|
||||
if self.browser_steps:
|
||||
if self.browser_steps_get_valid_steps():
|
||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||
|
||||
proxies = {}
|
||||
@@ -81,48 +79,14 @@ class fetcher(Fetcher):
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||
from requests_file import FileAdapter
|
||||
session.mount('file://', FileAdapter())
|
||||
|
||||
allow_iana_restricted = strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false'))
|
||||
|
||||
try:
|
||||
# Fresh DNS check at fetch time — catches DNS rebinding regardless of add-time cache.
|
||||
if not allow_iana_restricted:
|
||||
parsed_initial = urlparse(url)
|
||||
if parsed_initial.hostname and is_private_hostname(parsed_initial.hostname):
|
||||
raise Exception(f"Fetch blocked: '{url}' resolves to a private/reserved IP address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow.")
|
||||
|
||||
r = session.request(method=request_method,
|
||||
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||
url=url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
allow_redirects=False)
|
||||
|
||||
# Manually follow redirects so each hop's resolved IP can be validated,
|
||||
# preventing SSRF via an open redirect on a public host.
|
||||
current_url = url
|
||||
for _ in range(10):
|
||||
if not r.is_redirect:
|
||||
break
|
||||
location = r.headers.get('Location', '')
|
||||
redirect_url = urljoin(current_url, location)
|
||||
if not allow_iana_restricted:
|
||||
parsed_redirect = urlparse(redirect_url)
|
||||
if parsed_redirect.hostname and is_private_hostname(parsed_redirect.hostname):
|
||||
raise Exception(f"Redirect blocked: '{redirect_url}' resolves to a private/reserved IP address.")
|
||||
current_url = redirect_url
|
||||
r = session.request('GET', redirect_url,
|
||||
headers=request_headers,
|
||||
timeout=timeout,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
allow_redirects=False)
|
||||
else:
|
||||
raise Exception("Too many redirects")
|
||||
|
||||
verify=False)
|
||||
except Exception as e:
|
||||
msg = str(e)
|
||||
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
|
||||
@@ -148,32 +112,10 @@ class fetcher(Fetcher):
|
||||
# Default to UTF-8 for XML if no encoding found
|
||||
r.encoding = 'utf-8'
|
||||
else:
|
||||
# No charset in HTTP header - sniff encoding in priority order matching browsers
|
||||
# (WHATWG encoding sniffing algorithm):
|
||||
# 1. BOM - highest confidence, check before anything else
|
||||
# 2. <meta charset> in first 2kb
|
||||
# 3. chardet statistical detection - last resort
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
boms = [
|
||||
(b'\xef\xbb\xbf', 'utf-8-sig'),
|
||||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
|
||||
if bom_encoding:
|
||||
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
|
||||
r.encoding = bom_encoding
|
||||
else:
|
||||
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
|
||||
if meta_charset_match:
|
||||
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
|
||||
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
|
||||
r.encoding = encoding
|
||||
else:
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
# For other content types, use chardet
|
||||
encoding = chardet.detect(r.content)['encoding']
|
||||
if encoding:
|
||||
r.encoding = encoding
|
||||
|
||||
self.headers = r.headers
|
||||
|
||||
@@ -242,6 +184,7 @@ class fetcher(Fetcher):
|
||||
)
|
||||
|
||||
async def quit(self, watch=None):
|
||||
|
||||
# In case they switched to `requests` fetcher from something else
|
||||
# Then the screenshot could be old, in any case, it's not used here.
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
|
||||
|
||||
@@ -104,17 +104,15 @@ class fetcher(Fetcher):
|
||||
|
||||
from selenium.webdriver.remote.remote_connection import RemoteConnection
|
||||
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
|
||||
from selenium.webdriver.remote.client_config import ClientConfig
|
||||
from urllib3.util import Timeout
|
||||
driver = None
|
||||
try:
|
||||
connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90))
|
||||
client_config = ClientConfig(
|
||||
remote_server_addr=self.browser_connection_url,
|
||||
timeout=Timeout(connect=connection_timeout, total=connection_timeout)
|
||||
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
|
||||
remote_connection = RemoteConnection(
|
||||
self.browser_connection_url,
|
||||
)
|
||||
remote_connection = RemoteConnection(client_config=client_config)
|
||||
remote_connection.set_timeout(30) # seconds
|
||||
|
||||
# Now create the driver with the RemoteConnection
|
||||
driver = RemoteWebDriver(
|
||||
command_executor=remote_connection,
|
||||
options=options
|
||||
|
||||
@@ -45,36 +45,6 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
# Compiled regex patterns for performance
|
||||
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
|
||||
|
||||
# Regexes built from the constants above — no brittle hardcoded strings
|
||||
_EXTRACT_REMOVED_RE = re.compile(
|
||||
re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
|
||||
)
|
||||
_EXTRACT_ADDED_RE = re.compile(
|
||||
re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
|
||||
+ r'|' +
|
||||
re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
|
||||
)
|
||||
|
||||
|
||||
def extract_changed_from(raw_diff: str) -> str:
|
||||
"""Extract only the removed/changed-from fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_from}} — gives just the old value (e.g. old price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def extract_changed_to(raw_diff: str) -> str:
|
||||
"""Extract only the added/changed-into fragments from a raw diff string.
|
||||
|
||||
Useful for {{diff_changed_to}} — gives just the new value (e.g. new price),
|
||||
not the full surrounding line. Multiple fragments joined with newlines.
|
||||
"""
|
||||
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
|
||||
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
|
||||
"""
|
||||
|
||||
+44
-125
@@ -4,7 +4,6 @@ import flask_login
|
||||
import locale
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
@@ -28,6 +27,7 @@ from flask import (
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
from flask_restful import abort, Api
|
||||
from flask_cors import CORS
|
||||
|
||||
@@ -40,7 +40,7 @@ from loguru import logger
|
||||
|
||||
from changedetectionio import __version__
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon, Spec
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||
@@ -69,43 +69,15 @@ socketio_server = None
|
||||
|
||||
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
|
||||
CORS(app)
|
||||
from werkzeug.routing import BaseConverter, ValidationError
|
||||
from uuid import UUID
|
||||
|
||||
class StrictUUIDConverter(BaseConverter):
|
||||
# Special sentinel values allowed in addition to strict UUIDs
|
||||
_ALLOWED_SENTINELS = frozenset({'first'})
|
||||
|
||||
def to_python(self, value: str) -> str:
|
||||
if value in self._ALLOWED_SENTINELS:
|
||||
return value
|
||||
try:
|
||||
u = UUID(value)
|
||||
except ValueError as e:
|
||||
raise ValidationError() from e
|
||||
# Reject non-standard formats (braces, URNs, no-hyphens)
|
||||
if str(u) != value.lower():
|
||||
raise ValidationError()
|
||||
return str(u)
|
||||
|
||||
def to_url(self, value) -> str:
|
||||
return str(value)
|
||||
|
||||
# app setup (once)
|
||||
app.url_map.converters["uuid_str"] = StrictUUIDConverter
|
||||
|
||||
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak.
|
||||
# There's also a bug between flask compress and socketio that causes some kind of slow memory leak
|
||||
# It's better to use compression on your reverse proxy (nginx etc) instead.
|
||||
if strtobool(os.getenv("FLASK_ENABLE_COMPRESSION")):
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
app.config['COMPRESS_MIN_SIZE'] = 2096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
||||
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
||||
compress = FlaskCompress()
|
||||
compress.init_app(app)
|
||||
|
||||
# Super handy for compressing large BrowserSteps responses and others
|
||||
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak
|
||||
compress = FlaskCompress()
|
||||
app.config['COMPRESS_MIN_SIZE'] = 2096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
||||
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
||||
compress.init_app(app)
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
||||
|
||||
|
||||
@@ -212,23 +184,14 @@ def _is_safe_valid_url(test_url):
|
||||
from .validate_url import is_safe_valid_url
|
||||
return is_safe_valid_url(test_url)
|
||||
|
||||
@app.template_global('get_html_head_extras')
|
||||
def _get_html_head_extras():
|
||||
from .pluggy_interface import collect_html_head_extras
|
||||
return collect_html_head_extras()
|
||||
|
||||
|
||||
@app.template_filter('format_number_locale')
|
||||
def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||
# Format the number with two decimal places (locale format string will return 6 decimal)
|
||||
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
||||
return formatted_value
|
||||
|
||||
@app.template_filter('regex_search')
|
||||
def _jinja2_filter_regex_search(value, pattern):
|
||||
import re
|
||||
return re.search(pattern, str(value)) is not None
|
||||
return formatted_value
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
@@ -303,47 +266,6 @@ def _jinja2_filter_seconds_precise(timestamp):
|
||||
|
||||
return format(int(time.time()-timestamp), ',d')
|
||||
|
||||
@app.template_filter('format_duration')
|
||||
def _jinja2_filter_format_duration(seconds):
|
||||
"""Format a duration in seconds into human readable string like '5 days, 3 hours, 30 minutes'"""
|
||||
from datetime import timedelta
|
||||
|
||||
if not seconds or seconds < 0:
|
||||
return gettext('0 seconds')
|
||||
|
||||
td = timedelta(seconds=int(seconds))
|
||||
|
||||
# Calculate components
|
||||
years = td.days // 365
|
||||
remaining_days = td.days % 365
|
||||
months = remaining_days // 30
|
||||
remaining_days = remaining_days % 30
|
||||
weeks = remaining_days // 7
|
||||
days = remaining_days % 7
|
||||
|
||||
hours = td.seconds // 3600
|
||||
minutes = (td.seconds % 3600) // 60
|
||||
secs = td.seconds % 60
|
||||
|
||||
# Build parts list
|
||||
parts = []
|
||||
if years > 0:
|
||||
parts.append(f"{years} {gettext('year') if years == 1 else gettext('years')}")
|
||||
if months > 0:
|
||||
parts.append(f"{months} {gettext('month') if months == 1 else gettext('months')}")
|
||||
if weeks > 0:
|
||||
parts.append(f"{weeks} {gettext('week') if weeks == 1 else gettext('weeks')}")
|
||||
if days > 0:
|
||||
parts.append(f"{days} {gettext('day') if days == 1 else gettext('days')}")
|
||||
if hours > 0:
|
||||
parts.append(f"{hours} {gettext('hour') if hours == 1 else gettext('hours')}")
|
||||
if minutes > 0:
|
||||
parts.append(f"{minutes} {gettext('minute') if minutes == 1 else gettext('minutes')}")
|
||||
if secs > 0 or not parts:
|
||||
parts.append(f"{secs} {gettext('second') if secs == 1 else gettext('seconds')}")
|
||||
|
||||
return ", ".join(parts)
|
||||
|
||||
@app.template_filter('fetcher_status_icons')
|
||||
def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
"""Get status icon HTML for a given fetcher.
|
||||
@@ -393,8 +315,6 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
|
||||
return ''
|
||||
|
||||
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
|
||||
|
||||
@app.template_filter('sanitize_tag_class')
|
||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
"""Sanitize a tag title to create a valid CSS class name.
|
||||
@@ -406,8 +326,9 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
Returns:
|
||||
str: A sanitized string suitable for use as a CSS class name
|
||||
"""
|
||||
import re
|
||||
# Remove all non-alphanumeric characters and convert to lowercase
|
||||
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
|
||||
# Ensure it starts with a letter (CSS requirement)
|
||||
if sanitized and not sanitized[0].isalpha():
|
||||
sanitized = 'tag' + sanitized
|
||||
@@ -495,21 +416,28 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
available_languages = get_available_languages()
|
||||
language_codes = get_language_codes()
|
||||
|
||||
_locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
_locale_match_list = language_codes + list(_locale_aliases.keys())
|
||||
|
||||
def get_locale():
|
||||
# Locale aliases: map browser language codes to translation directory names
|
||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
||||
locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
return session['locale']
|
||||
|
||||
# 2. Fall back to Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(_locale_match_list)
|
||||
# 3. Map browser locale to our internal locale if needed
|
||||
return _locale_aliases.get(browser_locale, browser_locale)
|
||||
# Get the best match from browser's Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
||||
|
||||
# 3. Check if we need to map the browser locale to our internal locale
|
||||
if browser_locale in locale_aliases:
|
||||
return locale_aliases[browser_locale]
|
||||
|
||||
return browser_locale
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
@@ -561,22 +489,22 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
|
||||
watch_api.add_resource(WatchHistoryDiff,
|
||||
'/api/v1/watch/<uuid_str:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchSingleHistory,
|
||||
'/api/v1/watch/<uuid_str:uuid>/history/<string:timestamp>',
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
watch_api.add_resource(WatchFavicon,
|
||||
'/api/v1/watch/<uuid_str:uuid>/favicon',
|
||||
'/api/v1/watch/<string:uuid>/favicon',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
watch_api.add_resource(WatchHistory,
|
||||
'/api/v1/watch/<uuid_str:uuid>/history',
|
||||
'/api/v1/watch/<string:uuid>/history',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(CreateWatch, '/api/v1/watch',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(Watch, '/api/v1/watch/<uuid_str:uuid>',
|
||||
watch_api.add_resource(Watch, '/api/v1/watch/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(SystemInfo, '/api/v1/systeminfo',
|
||||
@@ -589,7 +517,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(Tags, '/api/v1/tags',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<uuid_str:uuid>',
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(Search, '/api/v1/search',
|
||||
@@ -598,8 +526,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(Notifications, '/api/v1/notifications',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Spec, '/api/v1/full-spec')
|
||||
|
||||
@login_manager.user_loader
|
||||
def user_loader(email):
|
||||
user = User()
|
||||
@@ -741,14 +667,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
def static_content(group, filename):
|
||||
from flask import make_response
|
||||
import re
|
||||
|
||||
# Strict sanitization: only allow a-z, 0-9, and underscore (blocks .. and other traversal)
|
||||
group = re.sub(r'[^a-z0-9_-]+', '', group.lower())
|
||||
filename = filename
|
||||
|
||||
# Additional safety: reject if sanitization resulted in empty strings
|
||||
if not group or not filename:
|
||||
abort(404)
|
||||
group = re.sub(r'[^\w.-]+', '', group.lower())
|
||||
filename = re.sub(r'[^\w.-]+', '', filename.lower())
|
||||
|
||||
if group == 'screenshot':
|
||||
# Could be sensitive, follow password requirements
|
||||
@@ -783,10 +703,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.data_dir, favicon_filename)
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
@@ -887,7 +807,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||
|
||||
# Initialize Socket.IO server conditionally based on settings
|
||||
socket_io_enabled = datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True)
|
||||
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
if socket_io_enabled and app.config.get('batch_mode'):
|
||||
socket_io_enabled = False
|
||||
if socket_io_enabled:
|
||||
@@ -1022,16 +942,15 @@ def check_for_new_version():
|
||||
import urllib3
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
session = requests.Session()
|
||||
session.verify = False
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
r = session.post("https://changedetection.io/check-ver.php",
|
||||
r = requests.post("https://changedetection.io/check-ver.php",
|
||||
data={'version': __version__,
|
||||
'app_guid': datastore.data['app_guid'],
|
||||
'watch_count': len(datastore.data['watching'])
|
||||
})
|
||||
},
|
||||
|
||||
verify=False)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@ from flask_babel import lazy_gettext as _l, gettext
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
|
||||
from changedetectionio.conditions.form import ConditionFormRow
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS, \
|
||||
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio import processors
|
||||
|
||||
@@ -35,7 +37,7 @@ from changedetectionio.widgets import TernaryNoneBooleanField
|
||||
|
||||
# default
|
||||
# each select <option data-enabled="enabled-0-0"
|
||||
from changedetectionio.browser_steps.browser_steps import browser_step_ui_config
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
|
||||
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
|
||||
@@ -492,6 +494,7 @@ class ValidateJinja2Template(object):
|
||||
Validates that a {token} is from a valid set
|
||||
"""
|
||||
def __call__(self, form, field):
|
||||
from changedetectionio import notification
|
||||
from changedetectionio.jinja2_custom import create_jinja_env
|
||||
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
|
||||
from jinja2.meta import find_undeclared_variables
|
||||
@@ -608,12 +611,13 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
raise ValidationError("XPath not permitted in this field!")
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
from changedetectionio.html_tools import SafeXPath3Parser
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
tree = html.fromstring("<html></html>")
|
||||
line = line.replace('xpath:', '')
|
||||
|
||||
try:
|
||||
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
|
||||
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
||||
except elementpath.ElementPathError as e:
|
||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||
raise ValidationError(message % (line, str(e)))
|
||||
@@ -667,11 +671,9 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
# `jq` requires full compilation in windows and so isn't generally available
|
||||
raise ValidationError("jq not support not found")
|
||||
|
||||
from changedetectionio.html_tools import validate_jq_expression
|
||||
input = line.replace('jq:', '')
|
||||
|
||||
try:
|
||||
validate_jq_expression(input)
|
||||
jq.compile(input)
|
||||
except (ValueError) as e:
|
||||
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
|
||||
@@ -818,7 +820,8 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
filter_text_removed = BooleanField(_l('Removed lines'), default=True)
|
||||
|
||||
trigger_text = StringListField(_l('Keyword triggers - Trigger/wait for text'), [validators.Optional(), ValidateListRegex()])
|
||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||
text_should_not_be_present = StringListField(_l('Block change-detection while text matches'), [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField(_l('Execute JavaScript before change detection'), render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
|
||||
@@ -1007,7 +1010,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
|
||||
)
|
||||
|
||||
password = SaltyPasswordField(_l('Password'), render_kw={"autocomplete": "new-password"})
|
||||
password = SaltyPasswordField(_l('Password'))
|
||||
pager_size = IntegerField(_l('Pager size'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
|
||||
+13
-146
@@ -4,7 +4,6 @@ from loguru import logger
|
||||
from typing import List
|
||||
import html
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||
@@ -14,45 +13,6 @@ PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||
|
||||
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
|
||||
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# jq builtins that can leak sensitive data or cause harm when user-supplied expressions are executed.
|
||||
# env/$ENV reads all process environment variables (passwords, API keys, etc.)
|
||||
# include/import can read arbitrary files from disk
|
||||
# input/inputs reads beyond the supplied JSON data
|
||||
# debug/stderr leaks data to stderr
|
||||
# halt/halt_error terminates the process (DoS)
|
||||
_JQ_BLOCKED_PATTERNS = [
|
||||
(re.compile(r'\benv\b'), 'env (reads environment variables)'),
|
||||
(re.compile(r'\$ENV\b'), '$ENV (reads environment variables)'),
|
||||
(re.compile(r'\binclude\b'), 'include (reads files from disk)'),
|
||||
(re.compile(r'\bimport\b'), 'import (reads files from disk)'),
|
||||
(re.compile(r'\binputs?\b'), 'input/inputs (reads beyond provided data)'),
|
||||
(re.compile(r'\bdebug\b'), 'debug (leaks data to stderr)'),
|
||||
(re.compile(r'\bstderr\b'), 'stderr (leaks data to stderr)'),
|
||||
(re.compile(r'\bhalt(?:_error)?\b'), 'halt/halt_error (terminates the process)'),
|
||||
(re.compile(r'\$__loc__\b'), '$__loc__ (leaks file path information)'),
|
||||
(re.compile(r'\bbuiltins\b'), 'builtins (enumerates available functions)'),
|
||||
(re.compile(r'\bmodulemeta\b'), 'modulemeta (leaks module information)'),
|
||||
(re.compile(r'\$JQ_BUILD_CONFIGURATION\b'), '$JQ_BUILD_CONFIGURATION (leaks build information)'),
|
||||
]
|
||||
|
||||
def validate_jq_expression(expression: str) -> None:
|
||||
"""Raise ValueError if the jq expression uses any dangerous builtin.
|
||||
|
||||
User-supplied jq expressions are executed server-side. Without this check,
|
||||
builtins like `env` expose every process environment variable (SALTED_PASS,
|
||||
proxy credentials, API keys, etc.) as watch output.
|
||||
"""
|
||||
from changedetectionio.strtobool import strtobool
|
||||
if strtobool(os.getenv('JQ_ALLOW_RISKY_EXPRESSIONS', 'false')):
|
||||
return
|
||||
|
||||
for pattern, description in _JQ_BLOCKED_PATTERNS:
|
||||
if pattern.search(expression):
|
||||
msg = f"jq expression uses disallowed builtin: {description}"
|
||||
logger.critical(f"Security: blocked jq expression containing '{description}' - expression: {expression!r}")
|
||||
raise ValueError(msg)
|
||||
|
||||
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
@@ -63,59 +23,6 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
|
||||
'unparsed-text',
|
||||
'unparsed-text-lines',
|
||||
'unparsed-text-available',
|
||||
'doc',
|
||||
'doc-available',
|
||||
'json-doc',
|
||||
'json-doc-available',
|
||||
'collection', # XPath 2.0+: loads XML node collections from arbitrary URIs
|
||||
'uri-collection', # XPath 3.0+: enumerates URIs from resource collections
|
||||
'transform', # XPath 3.1: XSLT transformation (currently raises, block proactively)
|
||||
'load-xquery-module', # XPath 3.1: loads XQuery modules (currently raises, block proactively)
|
||||
'environment-variable',
|
||||
'available-environment-variables',
|
||||
]
|
||||
|
||||
|
||||
def _build_safe_xpath3_parser():
|
||||
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
|
||||
|
||||
XPath 3.0 includes functions that can read arbitrary files or environment variables:
|
||||
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
|
||||
- doc / doc-available (XML fetch from URI)
|
||||
- environment-variable / available-environment-variables (env var leakage)
|
||||
|
||||
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
|
||||
so removing entries here does not affect XPath3Parser itself.
|
||||
|
||||
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
|
||||
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
|
||||
"""
|
||||
import os
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
class SafeXPath3Parser(XPath3Parser):
|
||||
pass
|
||||
|
||||
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
|
||||
if env_override is not None:
|
||||
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
|
||||
else:
|
||||
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
|
||||
|
||||
for _fn in blocked:
|
||||
SafeXPath3Parser.symbol_table.pop(_fn, None)
|
||||
|
||||
return SafeXPath3Parser
|
||||
|
||||
|
||||
# Module-level singleton — built once, reused everywhere.
|
||||
SafeXPath3Parser = _build_safe_xpath3_parser()
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
@@ -276,6 +183,8 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
"""
|
||||
from lxml import etree, html
|
||||
import elementpath
|
||||
# xpath 2.0-3.1
|
||||
from elementpath.xpath3 import XPath3Parser
|
||||
|
||||
parser = etree.HTMLParser()
|
||||
tree = None
|
||||
@@ -301,7 +210,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
|
||||
@@ -326,9 +235,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
else:
|
||||
html_block += elementpath_tostring(element)
|
||||
|
||||
# Drop element references before the finally block so tree.clear() can release
|
||||
# the libxml2 document immediately (elements pin the C-level doc via refcount).
|
||||
del r
|
||||
return html_block
|
||||
finally:
|
||||
# Explicitly clear the tree to free memory
|
||||
@@ -424,16 +330,12 @@ def _parse_json(json_data, json_filter):
|
||||
raise Exception("jq not support not found")
|
||||
|
||||
if json_filter.startswith("jq:"):
|
||||
expr = json_filter.removeprefix("jq:")
|
||||
validate_jq_expression(expr)
|
||||
jq_expression = jq.compile(expr)
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jq:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return _get_stripped_text_from_json_match(match)
|
||||
|
||||
if json_filter.startswith("jqraw:"):
|
||||
expr = json_filter.removeprefix("jqraw:")
|
||||
validate_jq_expression(expr)
|
||||
jq_expression = jq.compile(expr)
|
||||
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
|
||||
match = jq_expression.input(json_data).all()
|
||||
return '\n'.join(str(item) for item in match)
|
||||
|
||||
@@ -537,25 +439,13 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||
else:
|
||||
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
|
||||
# Server may claim application/json but actually return JSONP
|
||||
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
|
||||
if jsonp_match:
|
||||
try:
|
||||
inner = jsonp_match.group(1).strip()
|
||||
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
|
||||
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter )
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
@@ -671,33 +561,10 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
|
||||
)
|
||||
else:
|
||||
parser_config = None
|
||||
|
||||
if is_rss:
|
||||
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
|
||||
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
||||
else:
|
||||
# Use BS4 html.parser to strip bloat — SPA's often dump 10MB+ of CSS/JS into <head>,
|
||||
# causing inscriptis to silently give up. Regex-based stripping is unsafe because tags
|
||||
# can appear inside JSON data attributes with JS-escaped closing tags (e.g. <\/script>),
|
||||
# causing the regex to scan past the intended close and eat real page content.
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
# Strip tags that inscriptis cannot render as meaningful text and which can be very large.
|
||||
# svg/math: produce path-data/MathML garbage; canvas/iframe/template: no inscriptis handlers.
|
||||
# video/audio/picture are kept — they may contain meaningful fallback text or captions.
|
||||
for tag in soup.find_all(['head', 'script', 'style', 'noscript', 'svg',
|
||||
'math', 'canvas', 'iframe', 'template']):
|
||||
tag.decompose()
|
||||
|
||||
# SPAs often use <body style="display:none"> to hide content until JS loads.
|
||||
# inscriptis respects CSS display rules, so strip hiding styles from the body tag.
|
||||
body_tag = soup.find('body')
|
||||
if body_tag and body_tag.get('style'):
|
||||
style = body_tag['style']
|
||||
if re.search(r'\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b', style, re.IGNORECASE):
|
||||
logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{style}')")
|
||||
del body_tag['style']
|
||||
|
||||
html_content = str(soup)
|
||||
|
||||
text_content = get_text(html_content, config=parser_config)
|
||||
return text_content
|
||||
|
||||
@@ -37,8 +37,6 @@ def get_timeago_locale(flask_locale):
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
'hi': 'in_HI', # Hindi
|
||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||
'ja': 'ja', # Japanese
|
||||
'uk': 'uk', # Ukrainian
|
||||
'en_GB': 'en', # British English - timeago uses 'en'
|
||||
'en_US': 'en', # American English - timeago uses 'en'
|
||||
}
|
||||
@@ -69,7 +67,6 @@ LANGUAGE_DATA = {
|
||||
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
|
||||
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
|
||||
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
|
||||
'uk': {'flag': 'fi fi-ua fis', 'name': 'Українська'},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ from os import getenv
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
|
||||
from changedetectionio.model.Tags import TagsDict
|
||||
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
@@ -69,7 +68,7 @@ class model(dict):
|
||||
'schema_version' : 0,
|
||||
'shared_diff_access': False,
|
||||
'strip_ignored_lines': False,
|
||||
'tags': None, # Initialized in __init__ with real datastore_path
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
'ui': {
|
||||
'use_page_title_in_list': True,
|
||||
@@ -81,16 +80,10 @@ class model(dict):
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self, *arg, datastore_path=None, **kw):
|
||||
def __init__(self, *arg, **kw):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
# Capture any tags data passed in before base_config overwrites the structure
|
||||
existing_tags = self.get('settings', {}).get('application', {}).get('tags') or {}
|
||||
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
|
||||
self.update(deepcopy(self.base_config))
|
||||
# TagsDict requires the real datastore_path at runtime (cannot be set at class-definition time)
|
||||
if datastore_path is None:
|
||||
raise ValueError("App.model() requires 'datastore_path' keyword argument")
|
||||
self['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=datastore_path)
|
||||
|
||||
|
||||
def parse_headers_from_text_file(filepath):
|
||||
|
||||
@@ -1,48 +1,10 @@
|
||||
"""
|
||||
Tag/Group domain model for organizing and overriding watch settings.
|
||||
|
||||
ARCHITECTURE NOTE: Configuration Override Hierarchy
|
||||
===================================================
|
||||
|
||||
Tags can override Watch settings when overrides_watch=True.
|
||||
Current implementation requires manual checking in processors:
|
||||
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = datastore['settings']['application']['tags'][tag_uuid]
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
break
|
||||
|
||||
With Pydantic, this would be automatic via chain resolution:
|
||||
Watch → Tag (first with overrides_watch) → Global
|
||||
|
||||
See: Watch.py model docstring for full Pydantic architecture explanation
|
||||
See: processors/restock_diff/processor.py:184-192 for current manual implementation
|
||||
"""
|
||||
|
||||
from changedetectionio.model import watch_base
|
||||
from changedetectionio.model.persistence import EntityPersistenceMixin
|
||||
|
||||
class model(EntityPersistenceMixin, watch_base):
|
||||
"""
|
||||
Tag domain model - groups watches and can override their settings.
|
||||
|
||||
Tags inherit from watch_base to reuse all the same fields as Watch.
|
||||
When overrides_watch=True, tag settings take precedence over watch settings
|
||||
for all watches in this tag/group.
|
||||
|
||||
Fields:
|
||||
overrides_watch (bool): If True, this tag's settings override watch settings
|
||||
title (str): Display name for this tag/group
|
||||
uuid (str): Unique identifier
|
||||
... (all fields from watch_base can be set as tag-level overrides)
|
||||
|
||||
Resolution order when overrides_watch=True:
|
||||
Watch.field → Tag.field (if overrides_watch) → Global.field
|
||||
"""
|
||||
class model(watch_base):
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Parent class (watch_base) handles __datastore and __datastore_path
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
@@ -50,7 +12,3 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() and _get_commit_data() methods inherited from watch_base
|
||||
# Tag uses default _get_commit_data() (includes all keys)
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
_SENTINEL = object()
|
||||
|
||||
|
||||
class TagsDict(dict):
|
||||
"""Dict subclass that removes the corresponding tag.json file when a tag is deleted."""
|
||||
|
||||
def __init__(self, *args, datastore_path: str | os.PathLike, **kwargs) -> None:
|
||||
self._datastore_path = Path(datastore_path)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
super().__delitem__(key)
|
||||
tag_dir = self._datastore_path / key
|
||||
tag_json_file = tag_dir / "tag.json"
|
||||
if not os.path.exists(tag_json_file):
|
||||
logger.critical(f"Aborting deletion of directory '{tag_dir}' because '{tag_json_file}' does not exist.")
|
||||
return
|
||||
try:
|
||||
shutil.rmtree(tag_dir)
|
||||
logger.info(f"Deleted tag directory for tag {key!r}")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to delete tag directory for tag {key!r}: {e}")
|
||||
|
||||
def pop(self, key: str, default=_SENTINEL):
|
||||
"""Remove and return tag, deleting its tag.json file. Raises KeyError if missing and no default given."""
|
||||
if key in self:
|
||||
value = self[key]
|
||||
del self[key]
|
||||
return value
|
||||
if default is _SENTINEL:
|
||||
raise KeyError(key)
|
||||
return default
|
||||
+188
-272
@@ -1,29 +1,5 @@
|
||||
"""
|
||||
Watch domain model for change detection monitoring.
|
||||
|
||||
ARCHITECTURE NOTE: Configuration Override Hierarchy
|
||||
===================================================
|
||||
|
||||
This module implements Watch objects that inherit from dict (technical debt).
|
||||
The dream architecture would use Pydantic for:
|
||||
|
||||
1. CHAIN RESOLUTION (Watch → Tag → Global Settings)
|
||||
- Current: Manual resolution scattered across codebase
|
||||
- Future: @computed_field properties with automatic resolution
|
||||
- Examples: resolved_fetch_backend, resolved_restock_settings, etc.
|
||||
|
||||
2. DATABASE BACKEND ABSTRACTION
|
||||
- Current: Domain model tightly coupled to file-based JSON storage
|
||||
- Future: Domain model (Pydantic) separate from persistence layer
|
||||
- Enables: Easy migration to PostgreSQL, MongoDB, etc.
|
||||
|
||||
3. TYPE SAFETY & VALIDATION
|
||||
- Current: Dict access with no compile-time checks
|
||||
- Future: Type hints, IDE autocomplete, validation at boundaries
|
||||
|
||||
See class model docstring for detailed explanation and examples.
|
||||
See: processors/restock_diff/processor.py:184-192 for manual resolution example
|
||||
"""
|
||||
import gc
|
||||
from copy import copy
|
||||
|
||||
from blinker import signal
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
@@ -31,7 +7,6 @@ from changedetectionio.validate_url import is_safe_valid_url
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from . import watch_base
|
||||
from .persistence import EntityPersistenceMixin
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
@@ -43,11 +18,6 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||
|
||||
# Module-level favicon filename cache: data_dir → basename (or None)
|
||||
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
|
||||
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
|
||||
_FAVICON_FILENAME_CACHE: dict = {}
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
@@ -133,110 +103,22 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
raise Exception(f"Brotli compression failed for {filepath}: {e}")
|
||||
|
||||
|
||||
class model(EntityPersistenceMixin, watch_base):
|
||||
"""
|
||||
Watch domain model for monitoring URL changes.
|
||||
|
||||
Inherits from watch_base (which inherits dict) - see watch_base docstring for field documentation.
|
||||
|
||||
## Configuration Override Hierarchy (Chain Resolution)
|
||||
|
||||
The dream architecture uses a 3-level resolution chain:
|
||||
Watch settings → Tag/Group settings → Global settings
|
||||
|
||||
Current implementation is MANUAL (see processor.py:184-192 for example):
|
||||
- Processors manually check watch.get('field')
|
||||
- Then loop through watch.tags to find first tag with overrides_watch=True
|
||||
- Finally fall back to datastore['settings']['application']['field']
|
||||
|
||||
FUTURE: Pydantic-based chain resolution would enable:
|
||||
|
||||
```python
|
||||
# Instead of manual resolution in every processor:
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = datastore['settings']['application']['tags'][tag_uuid]
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
break
|
||||
|
||||
# Clean computed properties with automatic resolution:
|
||||
@computed_field
|
||||
def resolved_restock_settings(self) -> dict:
|
||||
if self.restock_settings:
|
||||
return self.restock_settings
|
||||
for tag_uuid in self.tags:
|
||||
tag = self._datastore.get_tag(tag_uuid)
|
||||
if tag.overrides_watch and tag.restock_settings:
|
||||
return tag.restock_settings
|
||||
return self._datastore.settings.restock_settings or {}
|
||||
|
||||
# Usage: watch.resolved_restock_settings (automatic, type-safe, tested once)
|
||||
```
|
||||
|
||||
Benefits of Pydantic migration:
|
||||
1. Single source of truth for resolution logic (not scattered across processors)
|
||||
2. Type safety + IDE autocomplete (watch.resolved_fetch_backend vs dict navigation)
|
||||
3. Database backend abstraction (domain model separate from persistence)
|
||||
4. Automatic validation at boundaries
|
||||
5. Self-documenting via type hints
|
||||
6. Easy to test resolution independently
|
||||
|
||||
Resolution chain examples that would benefit:
|
||||
- fetch_backend: watch → tag → global (see get_fetch_backend property)
|
||||
- notification_urls: watch → tag → global
|
||||
- time_between_check: watch → global (see threshold_seconds)
|
||||
- restock_settings: watch → tag (see processors/restock_diff/processor.py:184-192)
|
||||
- history_snapshot_max_length: watch → global (see save_history_blob:550-556)
|
||||
- All processor_config_* settings could use tag overrides
|
||||
|
||||
## Database Backend Abstraction with Pydantic
|
||||
|
||||
Current: Watch inherits dict, tightly coupled to file-based JSON storage
|
||||
Future: Domain model (Watch) separate from persistence layer
|
||||
|
||||
```python
|
||||
# Domain model (database-agnostic)
|
||||
class Watch(BaseModel):
|
||||
uuid: str
|
||||
url: str
|
||||
# ... validation, business logic
|
||||
|
||||
# Pluggable backends
|
||||
class DataStoreBackend(ABC):
|
||||
def save_watch(self, watch: Watch): ...
|
||||
def load_watch(self, uuid: str) -> Watch: ...
|
||||
|
||||
# Implementations: FileBackend, MongoBackend, PostgresBackend, etc.
|
||||
```
|
||||
|
||||
This would enable:
|
||||
- Easy migration between storage backends (file → postgres → mongodb)
|
||||
- Pydantic handles serialization/deserialization automatically
|
||||
- Domain logic stays clean (no storage concerns in Watch methods)
|
||||
|
||||
## Migration Path
|
||||
|
||||
Given existing codebase, incremental migration recommended:
|
||||
1. Create Pydantic models alongside existing dict-based models
|
||||
2. Add .to_pydantic() / .from_pydantic() bridge methods
|
||||
3. Gradually migrate code to use Pydantic models
|
||||
4. Remove dict inheritance once migration complete
|
||||
|
||||
See: watch_base docstring for technical debt discussion
|
||||
See: processors/restock_diff/processor.py:184-192 for manual resolution example
|
||||
See: Watch.py:550-556 for nested dict navigation that would become watch.resolved_*
|
||||
"""
|
||||
class model(watch_base):
|
||||
__newest_history_key = None
|
||||
__history_n = 0
|
||||
jitter_seconds = 0
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Validate __datastore before calling parent (Watch requires it)
|
||||
if not kw.get('__datastore'):
|
||||
raise ValueError("Watch object requires '__datastore' reference - cannot access global settings without it")
|
||||
self.__datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
self.__datastore = kw.get('__datastore')
|
||||
if not self.__datastore:
|
||||
raise ValueError("Watch object requires '__datastore' reference - cannot access global settings without it")
|
||||
if kw.get('__datastore'):
|
||||
del kw['__datastore']
|
||||
|
||||
# Parent class (watch_base) handles __datastore and __datastore_path
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
if kw.get('default'):
|
||||
@@ -249,8 +131,94 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
# Be sure the cached timestamp is ready
|
||||
bump = self.history
|
||||
|
||||
# Note: __deepcopy__, __getstate__, and __setstate__ are inherited from watch_base
|
||||
# This prevents memory leaks by sharing __datastore reference instead of copying it
|
||||
def __deepcopy__(self, memo):
|
||||
"""
|
||||
Custom deepcopy that excludes __datastore to prevent memory leaks.
|
||||
|
||||
CRITICAL FIX: Without this, deepcopy(watch) copies the entire datastore
|
||||
(which contains all other watches), causing exponential memory growth.
|
||||
With 100 watches, this creates 10,000 watch objects in memory (100²).
|
||||
|
||||
This is called by:
|
||||
- api/Watch.py:76 (API endpoint)
|
||||
- processors/base.py:26 (EVERY processor run)
|
||||
- store/__init__.py:544 (clone watch)
|
||||
- And 4+ other locations
|
||||
"""
|
||||
from copy import deepcopy
|
||||
|
||||
# Create a new instance without calling __init__ (avoids __datastore requirement)
|
||||
cls = self.__class__
|
||||
new_watch = cls.__new__(cls)
|
||||
memo[id(self)] = new_watch
|
||||
|
||||
# Copy the dict data (all the watch settings)
|
||||
for key, value in self.items():
|
||||
new_watch[key] = deepcopy(value, memo)
|
||||
|
||||
# Copy instance attributes EXCEPT the datastore references
|
||||
# These are cached/computed values that need to be preserved
|
||||
new_watch._model__newest_history_key = self._model__newest_history_key
|
||||
new_watch._model__history_n = self._model__history_n
|
||||
new_watch.jitter_seconds = self.jitter_seconds
|
||||
|
||||
# Copy datastore_path (string, safe to copy)
|
||||
new_watch._model__datastore_path = self._model__datastore_path
|
||||
|
||||
# CRITICAL: Share the datastore reference (don't copy it!)
|
||||
# This is safe because we never modify the datastore through the watch
|
||||
new_watch._model__datastore = self._model__datastore
|
||||
|
||||
# Do NOT copy favicon cache - let it be regenerated on demand
|
||||
# This is just a performance cache (prevents repeated glob operations)
|
||||
# and will be rebuilt automatically on first access
|
||||
|
||||
return new_watch
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Custom pickle serialization that excludes __datastore.
|
||||
|
||||
This handles pickle/unpickle (used by multiprocessing, caching, etc.)
|
||||
and ensures the datastore reference is never serialized.
|
||||
"""
|
||||
# Get the dict data
|
||||
state = dict(self)
|
||||
|
||||
# Add the instance attributes we want to preserve
|
||||
state['__watch_metadata__'] = {
|
||||
'newest_history_key': self._model__newest_history_key,
|
||||
'history_n': self._model__history_n,
|
||||
'jitter_seconds': self.jitter_seconds,
|
||||
'datastore_path': self._model__datastore_path,
|
||||
}
|
||||
|
||||
# NOTE: __datastore and _favicon_filename_cache are intentionally excluded
|
||||
# Both will be regenerated/restored as needed
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""
|
||||
Custom pickle deserialization.
|
||||
|
||||
WARNING: This creates a Watch without a __datastore reference!
|
||||
The caller MUST set watch._model__datastore after unpickling.
|
||||
"""
|
||||
# Extract metadata
|
||||
metadata = state.pop('__watch_metadata__', {})
|
||||
|
||||
# Restore dict data
|
||||
self.update(state)
|
||||
|
||||
# Restore instance attributes
|
||||
self._model__newest_history_key = metadata.get('newest_history_key')
|
||||
self._model__history_n = metadata.get('history_n', 0)
|
||||
self.jitter_seconds = metadata.get('jitter_seconds', 0)
|
||||
self._model__datastore_path = metadata.get('datastore_path')
|
||||
|
||||
# __datastore is NOT restored - caller must set it!
|
||||
# _favicon_filename_cache is NOT restored - will regenerate on demand
|
||||
self._model__datastore = None
|
||||
|
||||
@property
|
||||
def viewed(self):
|
||||
@@ -264,6 +232,11 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def has_unviewed(self):
|
||||
return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
if not os.path.isdir(self.watch_data_dir):
|
||||
logger.debug(f"> Creating data dir {self.watch_data_dir}")
|
||||
os.mkdir(self.watch_data_dir)
|
||||
|
||||
@property
|
||||
def link(self):
|
||||
|
||||
@@ -319,8 +292,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
|
||||
# But preserve processor config files (they're configuration, not history data)
|
||||
# Use glob not rglob here for safety.
|
||||
for item in pathlib.Path(str(self.data_dir)).glob("*.*"):
|
||||
for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
|
||||
# Skip processor config files
|
||||
if item.name in processor_config_files:
|
||||
continue
|
||||
@@ -340,6 +312,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
'last_notification_error': False,
|
||||
'last_viewed': 0,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False,
|
||||
'remote_server_reply': None,
|
||||
'track_ldjson_price_data': None
|
||||
})
|
||||
@@ -356,30 +329,8 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
@property
|
||||
def get_fetch_backend(self):
|
||||
"""
|
||||
Get the fetch backend for this watch with special case handling.
|
||||
|
||||
CHAIN RESOLUTION OPPORTUNITY:
|
||||
Currently returns watch.fetch_backend directly, but doesn't implement
|
||||
Watch → Tag → Global resolution chain. With Pydantic:
|
||||
|
||||
@computed_field
|
||||
def resolved_fetch_backend(self) -> str:
|
||||
# Special case: PDFs always use html_requests
|
||||
if self.is_pdf:
|
||||
return 'html_requests'
|
||||
|
||||
# Watch override
|
||||
if self.fetch_backend and self.fetch_backend != 'system':
|
||||
return self.fetch_backend
|
||||
|
||||
# Tag override (first tag with overrides_watch=True wins)
|
||||
for tag_uuid in self.tags:
|
||||
tag = self._datastore.get_tag(tag_uuid)
|
||||
if tag.overrides_watch and tag.fetch_backend:
|
||||
return tag.fetch_backend
|
||||
|
||||
# Global default
|
||||
return self._datastore.settings.fetch_backend
|
||||
Like just using the `fetch_backend` key but there could be some logic
|
||||
:return:
|
||||
"""
|
||||
# Maybe also if is_image etc?
|
||||
# This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text.
|
||||
@@ -388,37 +339,12 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
return self.get('fetch_backend')
|
||||
|
||||
@property
|
||||
def fetcher_supports_screenshots(self):
|
||||
"""Return True if the fetcher configured for this watch supports screenshots.
|
||||
|
||||
Resolves 'system' via self._datastore, then checks supports_screenshots on
|
||||
the actual fetcher class. Works for built-in and plugin fetchers alike.
|
||||
"""
|
||||
from changedetectionio import content_fetchers
|
||||
|
||||
fetcher_name = self.get_fetch_backend # already handles is_pdf → html_requests
|
||||
if not fetcher_name or fetcher_name == 'system':
|
||||
fetcher_name = self._datastore['settings']['application'].get('fetch_backend', 'html_requests')
|
||||
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
||||
if fetcher_class is None:
|
||||
return False
|
||||
|
||||
return bool(getattr(fetcher_class, 'supports_screenshots', False))
|
||||
|
||||
@property
|
||||
def is_pdf(self):
|
||||
url = str(self.get("url") or "").lower()
|
||||
content_type = str(self.get("content-type") or "").lower()
|
||||
|
||||
if content_type in ("none", "null", ""):
|
||||
content_type = ""
|
||||
|
||||
return (
|
||||
url.endswith(".pdf")
|
||||
or content_type.split(";")[0].strip() == "application/pdf"
|
||||
)
|
||||
# content_type field is set in the future
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/1392
|
||||
# Not sure the best logic here
|
||||
return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
@@ -453,11 +379,11 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
tmp_history = {}
|
||||
|
||||
# In the case we are only using the watch for processing without history
|
||||
if not self.data_dir:
|
||||
if not self.watch_data_dir:
|
||||
return []
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
if os.path.isfile(fname):
|
||||
logger.debug(f"Reading watch history index for {self.get('uuid')}")
|
||||
with open(fname, "r", encoding='utf-8') as f:
|
||||
@@ -470,13 +396,13 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
# Cross-platform: check for any path separator (works on Windows and Unix)
|
||||
if os.sep not in v and '/' not in v and '\\' not in v:
|
||||
# Relative filename only, no path separators
|
||||
v = os.path.join(self.data_dir, v)
|
||||
v = os.path.join(self.watch_data_dir, v)
|
||||
else:
|
||||
# It's possible that they moved the datadir on older versions
|
||||
# So the snapshot exists but is in a different path
|
||||
# Cross-platform: use os.path.basename instead of split('/')
|
||||
snapshot_fname = os.path.basename(v)
|
||||
proposed_new_path = os.path.join(self.data_dir, snapshot_fname)
|
||||
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||
v = proposed_new_path
|
||||
|
||||
@@ -493,7 +419,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
@property
|
||||
def has_history(self):
|
||||
fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
return os.path.isfile(fname)
|
||||
|
||||
@property
|
||||
@@ -599,7 +525,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def _write_atomic(self, dest, data, mode='wb'):
|
||||
"""Write data atomically to dest using a temp file"""
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.data_dir) as tmp:
|
||||
with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.watch_data_dir) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
@@ -608,7 +534,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def history_trim(self, newest_n_items):
|
||||
from pathlib import Path
|
||||
import gc
|
||||
|
||||
# Sort by timestamp (key)
|
||||
sorted_items = sorted(self.history.items(), key=lambda x: int(x[0]))
|
||||
|
||||
@@ -625,7 +551,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
finally:
|
||||
logger.debug(f"[{self.get('uuid')}] Deleted {item[1]} history snapshot")
|
||||
try:
|
||||
dest = os.path.join(self.data_dir, self.history_index_filename)
|
||||
dest = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
output = "\r\n".join(
|
||||
f"{k},{Path(v).name}"
|
||||
for k, v in keep_part.items()
|
||||
@@ -664,7 +590,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
ext = 'bin'
|
||||
|
||||
snapshot_fname = f"{snapshot_id}.{ext}"
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents)
|
||||
logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)")
|
||||
|
||||
@@ -674,7 +600,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
# Compressed text
|
||||
import brotli
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
if not os.path.exists(dest):
|
||||
try:
|
||||
@@ -685,16 +611,16 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
|
||||
# Fallback to uncompressed
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
else:
|
||||
# Plain text
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
|
||||
# Append to history.txt atomically
|
||||
index_fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
index_fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
index_line = f"{timestamp},{snapshot_fname}\n"
|
||||
|
||||
with open(index_fname, 'a', encoding='utf-8') as f:
|
||||
@@ -706,13 +632,11 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
self.__newest_history_key = timestamp
|
||||
self.__history_n += 1
|
||||
|
||||
# MANUAL CHAIN RESOLUTION: Watch → Global
|
||||
# With Pydantic, this would become: maxlen = watch.resolved_history_snapshot_max_length
|
||||
# @computed_field def resolved_history_snapshot_max_length(self) -> Optional[int]:
|
||||
# if self.history_snapshot_max_length: return self.history_snapshot_max_length
|
||||
# if tag := self._get_override_tag(): return tag.history_snapshot_max_length
|
||||
# return self._datastore.settings.history_snapshot_max_length
|
||||
maxlen = self.get('history_snapshot_max_length') or self.get_global_setting('application', 'history_snapshot_max_length')
|
||||
|
||||
maxlen = (
|
||||
self.get('history_snapshot_max_length')
|
||||
or (self.__datastore and self.__datastore['settings']['application'].get('history_snapshot_max_length'))
|
||||
)
|
||||
|
||||
if maxlen and self.__history_n and self.__history_n > maxlen:
|
||||
self.history_trim(newest_n_items=maxlen)
|
||||
@@ -769,7 +693,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
return not local_lines.issubset(existing_history)
|
||||
|
||||
def get_screenshot(self):
|
||||
fname = os.path.join(self.data_dir, "last-screenshot.png")
|
||||
fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
|
||||
@@ -784,7 +708,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
if not favicon_fname:
|
||||
return True
|
||||
try:
|
||||
fname = next(iter(glob.glob(os.path.join(self.data_dir, "favicon.*"))), None)
|
||||
fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None)
|
||||
logger.trace(f"Favicon file maybe found at {fname}")
|
||||
if os.path.isfile(fname):
|
||||
file_age = int(time.time() - os.path.getmtime(fname))
|
||||
@@ -817,7 +741,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
base = "favicon"
|
||||
extension = "ico"
|
||||
|
||||
fname = os.path.join(self.data_dir, f"favicon.{extension}")
|
||||
fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
|
||||
|
||||
try:
|
||||
# validate=True makes sure the string only contains valid base64 chars
|
||||
@@ -830,8 +754,9 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
# Invalidate module-level favicon filename cache for this watch
|
||||
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
|
||||
# Invalidate favicon filename cache
|
||||
if hasattr(self, '_favicon_filename_cache'):
|
||||
delattr(self, '_favicon_filename_cache')
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
@@ -846,23 +771,35 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def get_favicon_filename(self) -> str | None:
|
||||
"""
|
||||
Find any favicon.* file in the watch data directory.
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
|
||||
deepcopy (which drops instance attrs), and concurrent request races.
|
||||
Invalidated by bump_favicon() when a new favicon is saved.
|
||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
||||
|
||||
Returns:
|
||||
str: Basename of the favicon file, or None if not found.
|
||||
str: Basename of the newest favicon file, or None if not found.
|
||||
"""
|
||||
if self.data_dir in _FAVICON_FILENAME_CACHE:
|
||||
return _FAVICON_FILENAME_CACHE[self.data_dir]
|
||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
||||
cache_key = '_favicon_filename_cache'
|
||||
if hasattr(self, cache_key):
|
||||
return getattr(self, cache_key)
|
||||
|
||||
import glob
|
||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||
fname = os.path.basename(files[0]) if files else None
|
||||
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
|
||||
return fname
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
result = None
|
||||
else:
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
result = os.path.basename(newest_file)
|
||||
|
||||
# Cache the result
|
||||
setattr(self, cache_key, result)
|
||||
return result
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
@@ -878,7 +815,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
import os
|
||||
import time
|
||||
|
||||
thumbnail_path = os.path.join(self.data_dir, "thumbnail.jpeg")
|
||||
thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
|
||||
top_trim = 500 # Pixels from top of screenshot to use
|
||||
|
||||
screenshot_path = self.get_screenshot()
|
||||
@@ -929,7 +866,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
return None
|
||||
|
||||
def __get_file_ctime(self, filename):
|
||||
fname = os.path.join(self.data_dir, filename)
|
||||
fname = os.path.join(self.watch_data_dir, filename)
|
||||
if os.path.isfile(fname):
|
||||
return int(os.path.getmtime(fname))
|
||||
return False
|
||||
@@ -954,9 +891,14 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def snapshot_error_screenshot_ctime(self):
|
||||
return self.__get_file_ctime('last-error-screenshot.png')
|
||||
|
||||
@property
|
||||
def watch_data_dir(self):
|
||||
# The base dir of the watch data
|
||||
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
|
||||
|
||||
def get_error_text(self):
|
||||
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.data_dir, "last-error.txt")
|
||||
fname = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
@@ -964,7 +906,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def get_error_snapshot(self):
|
||||
"""Return path to the screenshot that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.data_dir, "last-error-screenshot.png")
|
||||
fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
return False
|
||||
@@ -988,37 +930,6 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def toggle_mute(self):
|
||||
self['notification_muted'] ^= True
|
||||
|
||||
def _get_commit_data(self):
|
||||
"""
|
||||
Prepare watch data for commit.
|
||||
|
||||
Excludes processor_config_* keys (stored in separate files).
|
||||
Normalizes browser_steps to empty list if no meaningful steps.
|
||||
"""
|
||||
import copy
|
||||
|
||||
# Get base snapshot with lock
|
||||
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
snapshot = dict(self)
|
||||
else:
|
||||
snapshot = dict(self)
|
||||
|
||||
# Exclude processor config keys (stored separately)
|
||||
watch_dict = {k: copy.deepcopy(v) for k, v in snapshot.items() if not k.startswith('processor_config_')}
|
||||
|
||||
# Normalize browser_steps: if no meaningful steps, save as empty list
|
||||
if not self.has_browser_steps:
|
||||
watch_dict['browser_steps'] = []
|
||||
|
||||
return watch_dict
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() method inherited from watch_base
|
||||
|
||||
|
||||
def extra_notification_token_values(self):
|
||||
# Used for providing extra tokens
|
||||
# return {'widget': 555}
|
||||
@@ -1048,7 +959,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
if not csv_writer:
|
||||
# A file on the disk can be transferred much faster via flask than a string reply
|
||||
csv_output_filename = f"report-{self.get('uuid')}.csv"
|
||||
f = open(os.path.join(self.data_dir, csv_output_filename), 'w')
|
||||
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
|
||||
# @todo some headers in the future
|
||||
#fieldnames = ['Epoch seconds', 'Date']
|
||||
csv_writer = csv.writer(f,
|
||||
@@ -1090,7 +1001,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def save_error_text(self, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
target_path = os.path.join(self.data_dir, "last-error.txt")
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
with open(target_path, 'w', encoding='utf-8') as f:
|
||||
f.write(contents)
|
||||
|
||||
@@ -1099,9 +1010,9 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
import zlib
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(str(self.data_dir), "elements-error.deflate")
|
||||
target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate")
|
||||
else:
|
||||
target_path = os.path.join(str(self.data_dir), "elements.deflate")
|
||||
target_path = os.path.join(str(self.watch_data_dir), "elements.deflate")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
@@ -1116,9 +1027,9 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def save_screenshot(self, screenshot: bytes, as_error=False):
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(self.data_dir, "last-error-screenshot.png")
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
else:
|
||||
target_path = os.path.join(self.data_dir, "last-screenshot.png")
|
||||
target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
@@ -1129,7 +1040,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def get_last_fetched_text_before_filters(self):
|
||||
import brotli
|
||||
filepath = os.path.join(self.data_dir, 'last-fetched.br')
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
|
||||
if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0:
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
@@ -1144,13 +1055,13 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
def save_last_text_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.data_dir, 'last-fetched.br')
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
_brotli_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
|
||||
def save_last_fetched_html(self, timestamp, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
_brotli_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
self._prune_last_fetched_html_snapshots()
|
||||
|
||||
@@ -1158,7 +1069,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
import brotli
|
||||
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if os.path.isfile(filepath):
|
||||
with open(filepath, 'rb') as f:
|
||||
return (brotli.decompress(f.read()).decode('utf-8'))
|
||||
@@ -1173,7 +1084,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
|
||||
for index, timestamp in enumerate(dates):
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
# Keep only the first 2
|
||||
if index > 1 and os.path.isfile(filepath):
|
||||
@@ -1184,7 +1095,7 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def get_browsersteps_available_screenshots(self):
|
||||
"For knowing which screenshots are available to show the user in BrowserSteps UI"
|
||||
available = []
|
||||
for f in Path(self.data_dir).glob('step_before-*.jpeg'):
|
||||
for f in Path(self.watch_data_dir).glob('step_before-*.jpeg'):
|
||||
step_n=re.search(r'step_before-(\d+)', f.name)
|
||||
if step_n:
|
||||
available.append(step_n.group(1))
|
||||
@@ -1193,13 +1104,18 @@ class model(EntityPersistenceMixin, watch_base):
|
||||
def compile_error_texts(self, has_proxies=None):
|
||||
"""Compile error texts for this watch.
|
||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||
from flask import url_for, has_request_context
|
||||
from flask import url_for
|
||||
from markupsafe import Markup
|
||||
|
||||
output = [] # Initialize as list since we're using append
|
||||
last_error = self.get('last_error','')
|
||||
|
||||
has_app_context = has_request_context()
|
||||
try:
|
||||
url_for('settings.settings_page')
|
||||
except Exception as e:
|
||||
has_app_context = False
|
||||
else:
|
||||
has_app_context = True
|
||||
|
||||
# has app+request context, we can use url_for()
|
||||
if has_app_context:
|
||||
|
||||
@@ -2,175 +2,12 @@ import os
|
||||
import uuid
|
||||
|
||||
from changedetectionio import strtobool
|
||||
from .persistence import EntityPersistenceMixin, _determine_entity_type
|
||||
|
||||
__all__ = ['EntityPersistenceMixin', 'watch_base']
|
||||
|
||||
from ..browser_steps.browser_steps import browser_steps_get_valid_steps
|
||||
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||
|
||||
|
||||
class watch_base(dict):
|
||||
"""
|
||||
Base watch domain model (inherits from dict for backward compatibility).
|
||||
|
||||
WARNING: This class inherits from dict, which violates proper encapsulation.
|
||||
Dict inheritance is legacy technical debt that should be refactored to a proper
|
||||
domain model (e.g., Pydantic BaseModel) for better type safety and validation.
|
||||
|
||||
TODO: Migrate to Pydantic BaseModel for:
|
||||
- Type safety and IDE autocomplete
|
||||
- Automatic validation
|
||||
- Clear separation between domain model and serialization
|
||||
- Database backend abstraction (file → postgres → mongodb)
|
||||
- Configuration override chain resolution (Watch → Tag → Global)
|
||||
- Immutability options
|
||||
- Better testing
|
||||
- USE https://docs.pydantic.dev/latest/integrations/datamodel_code_generator TO BUILD THE MODEL FROM THE API-SPEC!!!
|
||||
|
||||
CHAIN RESOLUTION ARCHITECTURE:
|
||||
The dream is a 3-level override hierarchy:
|
||||
Watch settings → Tag/Group settings → Global settings
|
||||
|
||||
Current implementation: MANUAL resolution scattered across codebase
|
||||
- Processors manually check watch.get('field')
|
||||
- Loop through tags to find overrides_watch=True
|
||||
- Fall back to datastore['settings']['application']['field']
|
||||
|
||||
Pydantic implementation: AUTOMATIC resolution via @computed_field
|
||||
- Single source of truth for each setting's resolution logic
|
||||
- Type-safe, testable, self-documenting
|
||||
- Example: watch.resolved_fetch_backend (instead of nested dict navigation)
|
||||
|
||||
See: Watch.py model docstring for detailed Pydantic architecture plan
|
||||
See: Tag.py model docstring for tag override explanation
|
||||
See: processors/restock_diff/processor.py:184-192 for current manual example
|
||||
|
||||
Core Fields:
|
||||
uuid (str): Unique identifier for this watch (auto-generated)
|
||||
url (str): Target URL to monitor for changes
|
||||
title (str|None): Custom display name (overrides page_title if set)
|
||||
page_title (str|None): Title extracted from <title> tag of monitored page
|
||||
tags (List[str]): List of tag UUIDs for categorization
|
||||
tag (str): DEPRECATED - Old single-tag system, use tags instead
|
||||
|
||||
Check Configuration:
|
||||
processor (str): Processor type ('text_json_diff', 'restock_diff', etc.)
|
||||
fetch_backend (str): Fetcher to use ('system', 'html_requests', 'playwright', etc.)
|
||||
method (str): HTTP method ('GET', 'POST', etc.)
|
||||
headers (dict): Custom HTTP headers to send
|
||||
proxy (str|None): Preferred proxy server
|
||||
paused (bool): Whether change detection is paused
|
||||
|
||||
Scheduling:
|
||||
time_between_check (dict): Check interval {'weeks': int, 'days': int, 'hours': int, 'minutes': int, 'seconds': int}
|
||||
time_between_check_use_default (bool): Use global default interval if True
|
||||
time_schedule_limit (dict): Weekly schedule limiting when checks can run
|
||||
Structure: {
|
||||
'enabled': bool,
|
||||
'monday/tuesday/.../sunday': {
|
||||
'enabled': bool,
|
||||
'start_time': str ('HH:MM'),
|
||||
'duration': {'hours': str, 'minutes': str}
|
||||
}
|
||||
}
|
||||
|
||||
Content Filtering:
|
||||
include_filters (List[str]): CSS/XPath selectors to extract content
|
||||
subtractive_selectors (List[str]): Selectors to remove from content
|
||||
ignore_text (List[str]): Text patterns to ignore in change detection
|
||||
trigger_text (List[str]): Text/regex that must be present to trigger change
|
||||
text_should_not_be_present (List[str]): Text that should NOT be present
|
||||
extract_text (List[str]): Regex patterns to extract specific text after filtering
|
||||
|
||||
Text Processing:
|
||||
trim_text_whitespace (bool): Strip leading/trailing whitespace
|
||||
sort_text_alphabetically (bool): Sort lines alphabetically before comparison
|
||||
remove_duplicate_lines (bool): Remove duplicate lines
|
||||
check_unique_lines (bool): Compare against all history for unique lines
|
||||
strip_ignored_lines (bool|None): Remove lines matching ignore patterns
|
||||
|
||||
Change Detection Filters:
|
||||
filter_text_added (bool): Include added text in change detection
|
||||
filter_text_removed (bool): Include removed text in change detection
|
||||
filter_text_replaced (bool): Include replaced text in change detection
|
||||
|
||||
Browser Automation:
|
||||
browser_steps (List[dict]): Browser automation steps for JS-heavy sites
|
||||
browser_steps_last_error_step (int|None): Last step that caused error
|
||||
webdriver_delay (int|None): Seconds to wait after page load
|
||||
webdriver_js_execute_code (str|None): JavaScript to execute before extraction
|
||||
|
||||
Restock Detection:
|
||||
in_stock_only (bool): Only trigger on in-stock transitions
|
||||
follow_price_changes (bool): Monitor price changes
|
||||
has_ldjson_price_data (bool|None): Whether page has LD-JSON price data
|
||||
track_ldjson_price_data (str|None): Track LD-JSON price data ('ACCEPT', 'REJECT', None)
|
||||
price_change_threshold_percent (float|None): Minimum price change % to trigger
|
||||
|
||||
Notifications:
|
||||
notification_urls (List[str]): Apprise URLs for notifications
|
||||
notification_title (str|None): Custom notification title template
|
||||
notification_body (str|None): Custom notification body template
|
||||
notification_format (str): Notification format (e.g., 'System default', 'Text', 'HTML')
|
||||
notification_muted (bool): Disable notifications for this watch
|
||||
notification_screenshot (bool): Include screenshot in notifications
|
||||
notification_alert_count (int): Number of notifications sent
|
||||
last_notification_error (str|None): Last notification error message
|
||||
body (str|None): DEPRECATED? Legacy notification body field
|
||||
filter_failure_notification_send (bool): Send notification on filter failures
|
||||
|
||||
History & State:
|
||||
date_created (int|None): Unix timestamp of watch creation
|
||||
last_checked (int): Unix timestamp of last check
|
||||
last_viewed (int): History snapshot key of last user view
|
||||
last_error (str|bool): Last error message or False if no error
|
||||
check_count (int): Total number of checks performed
|
||||
fetch_time (float): Duration of last fetch in seconds
|
||||
consecutive_filter_failures (int): Counter for consecutive filter match failures
|
||||
previous_md5 (str|bool): MD5 hash of previous content
|
||||
history_snapshot_max_length (int|None): Max history snapshots to keep (None = use global)
|
||||
|
||||
Conditions:
|
||||
conditions (dict): Custom conditions for change detection logic
|
||||
conditions_match_logic (str): Logic operator ('ALL', 'ANY') for conditions
|
||||
|
||||
Metadata:
|
||||
content-type (str|None): Content-Type from last fetch
|
||||
remote_server_reply (str|None): Server header from last response
|
||||
ignore_status_codes (List[int]|None): HTTP status codes to ignore
|
||||
use_page_title_in_list (bool|None): Display page title in watch list (None = use system default)
|
||||
|
||||
Instance Attributes (not serialized):
|
||||
__datastore: Reference to parent DataStore (set externally after creation)
|
||||
data_dir: Filesystem path for this watch's data directory
|
||||
|
||||
Notes:
|
||||
- Many fields default to None to distinguish "not set" from "set to default"
|
||||
- When field is None, system-level defaults are used
|
||||
- Processor-specific configs (e.g., processor_config_*) are NOT stored in watch.json
|
||||
They are stored in separate {processor_name}.json files
|
||||
- This class is used for both Watch and Tag objects (tags reuse the structure)
|
||||
"""
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Store datastore reference (common to Watch and Tag)
|
||||
# Use single underscore to avoid name mangling issues in subclasses
|
||||
self._datastore = kw.get('__datastore')
|
||||
if kw.get('__datastore'):
|
||||
del kw['__datastore']
|
||||
|
||||
# Store datastore_path (common to Watch and Tag)
|
||||
self._datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
# IMPORTANT: Don't initialize __watch_was_edited yet!
|
||||
# We'll initialize it AFTER the initial update() call below
|
||||
# This prevents marking the watch as edited during initialization
|
||||
|
||||
self.update({
|
||||
# Custom notification content
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
@@ -179,7 +16,7 @@ class watch_base(dict):
|
||||
'body': None,
|
||||
'browser_steps': [],
|
||||
'browser_steps_last_error_step': None,
|
||||
'conditions' : [],
|
||||
'conditions' : {},
|
||||
'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
|
||||
'check_count': 0,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
@@ -216,6 +53,7 @@ class watch_base(dict):
|
||||
'page_title': None, # <title> from the page
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||
'processor': 'text_json_diff', # could be restock_diff or others from .processors
|
||||
'price_change_threshold_percent': None,
|
||||
'proxy': None, # Preferred proxy connection
|
||||
@@ -301,372 +139,5 @@ class watch_base(dict):
|
||||
|
||||
super(watch_base, self).__init__(*arg, **kw)
|
||||
|
||||
# Check if we're being initialized from an existing watch object
|
||||
# that has was_edited=True, so we can preserve the flag
|
||||
preserve_edited_flag = False
|
||||
if self.get('default'):
|
||||
# When creating a new watch object from an existing one (e.g., changing processor),
|
||||
# preserve the was_edited flag if it was True
|
||||
default_watch = self.get('default')
|
||||
if hasattr(default_watch, 'was_edited') and default_watch.was_edited:
|
||||
preserve_edited_flag = True
|
||||
del self['default']
|
||||
|
||||
# NOW initialize the edited flag after all initial setup is complete
|
||||
# This ensures initialization doesn't trigger the edited flag
|
||||
# But preserve it if the source watch had it set to True
|
||||
self.__watch_was_edited = preserve_edited_flag
|
||||
|
||||
def _mark_field_as_edited(self, key):
|
||||
"""
|
||||
Helper to mark a field as edited if it's writable.
|
||||
|
||||
Internal method used by __setitem__, update(), pop(), etc.
|
||||
"""
|
||||
# Don't track edits during initial load or if already edited
|
||||
if not hasattr(self, '_watch_base__watch_was_edited'):
|
||||
return
|
||||
if self.__watch_was_edited:
|
||||
return # Already marked as edited
|
||||
|
||||
# Import from shared schema utilities (no circular dependency)
|
||||
from .schema_utils import get_readonly_watch_fields
|
||||
readonly_fields = get_readonly_watch_fields()
|
||||
|
||||
# Additional system-managed fields not in OpenAPI spec (yet)
|
||||
# These are set by processors/workers and should not trigger edited flag
|
||||
additional_system_fields = {
|
||||
'last_check_status', # Set by processors
|
||||
'restock', # Set by restock processor
|
||||
'last_viewed', # Set by mark_all_viewed endpoint
|
||||
}
|
||||
|
||||
# Only mark as edited if this is a user-writable field
|
||||
if key not in readonly_fields and key not in additional_system_fields:
|
||||
self.__watch_was_edited = True
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""
|
||||
Override dict.__setitem__ to track when writable watch fields are modified.
|
||||
|
||||
This enables skipping reprocessing when:
|
||||
1. HTML content is unchanged (checksumFromPreviousCheckWasTheSame)
|
||||
2. AND watch configuration was not edited
|
||||
|
||||
Only sets the edited flag when field is NOT in readonly_fields (from OpenAPI spec).
|
||||
"""
|
||||
# Set the value first (always)
|
||||
super().__setitem__(key, value)
|
||||
# Mark as edited if writable field
|
||||
self._mark_field_as_edited(key)
|
||||
|
||||
def __delitem__(self, key):
|
||||
"""Override dict.__delitem__ to track deletions of writable fields."""
|
||||
super().__delitem__(key)
|
||||
self._mark_field_as_edited(key)
|
||||
|
||||
def update(self, *args, **kwargs):
|
||||
|
||||
if args and args[0].get('browser_steps'):
|
||||
args[0]['browser_steps'] = browser_steps_get_valid_steps(args[0].get('browser_steps'))
|
||||
|
||||
"""Override dict.update() to track modifications to writable fields."""
|
||||
# Call parent update first
|
||||
super().update(*args, **kwargs)
|
||||
|
||||
# Mark as edited for any writable fields that were updated
|
||||
# Handle both update(dict) and update(key=value) forms
|
||||
if args:
|
||||
for key in args[0].keys():
|
||||
self._mark_field_as_edited(key)
|
||||
for key in kwargs.keys():
|
||||
self._mark_field_as_edited(key)
|
||||
|
||||
|
||||
def pop(self, key, *args):
|
||||
"""Override dict.pop() to track removal of writable fields."""
|
||||
result = super().pop(key, *args)
|
||||
self._mark_field_as_edited(key)
|
||||
return result
|
||||
|
||||
def setdefault(self, key, default=None):
|
||||
"""Override dict.setdefault() to track modifications to writable fields."""
|
||||
# Only marks as edited if key didn't exist (i.e., a new value was set)
|
||||
existed = key in self
|
||||
result = super().setdefault(key, default)
|
||||
if not existed:
|
||||
self._mark_field_as_edited(key)
|
||||
return result
|
||||
|
||||
@property
|
||||
def was_edited(self):
|
||||
"""
|
||||
Check if watch configuration was edited since last processing.
|
||||
|
||||
Returns:
|
||||
bool: True if writable fields were modified, False otherwise
|
||||
"""
|
||||
return getattr(self, '_watch_base__watch_was_edited', False)
|
||||
|
||||
def reset_watch_edited_flag(self):
|
||||
"""
|
||||
Reset the watch edited flag after successful processing.
|
||||
|
||||
Call this after processing completes to allow future content-only change detection.
|
||||
"""
|
||||
self.__watch_was_edited = False
|
||||
|
||||
@classmethod
|
||||
def get_property_names(cls):
|
||||
"""
|
||||
Get all @property attribute names from this model class using introspection.
|
||||
|
||||
This discovers computed/derived properties that are not stored in the datastore.
|
||||
These properties should be filtered out during PUT/POST requests.
|
||||
|
||||
Returns:
|
||||
frozenset: Immutable set of @property attribute names from the model class
|
||||
"""
|
||||
import functools
|
||||
|
||||
# Create a cached version if it doesn't exist
|
||||
if not hasattr(cls, '_cached_get_property_names'):
|
||||
@functools.cache
|
||||
def _get_props():
|
||||
properties = set()
|
||||
# Use introspection to find all @property attributes
|
||||
for name in dir(cls):
|
||||
# Skip private/magic attributes
|
||||
if name.startswith('_'):
|
||||
continue
|
||||
try:
|
||||
attr = getattr(cls, name)
|
||||
# Check if it's a property descriptor
|
||||
if isinstance(attr, property):
|
||||
properties.add(name)
|
||||
except (AttributeError, TypeError):
|
||||
continue
|
||||
return frozenset(properties)
|
||||
|
||||
cls._cached_get_property_names = _get_props
|
||||
|
||||
return cls._cached_get_property_names()
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
"""
|
||||
Custom deepcopy for all watch_base subclasses (Watch, Tag, etc.).
|
||||
|
||||
CRITICAL FIX: Prevents copying large reference objects like __datastore
|
||||
which would cause exponential memory growth when Watch objects are deepcopied.
|
||||
|
||||
This is called by:
|
||||
- api/Watch.py:76 (API endpoint)
|
||||
- api/Tags.py:28 (Tags API)
|
||||
- processors/base.py:26 (EVERY processor run)
|
||||
- store/__init__.py:544 (clone watch)
|
||||
- And other locations
|
||||
"""
|
||||
from copy import deepcopy
|
||||
|
||||
# Create new instance without calling __init__
|
||||
cls = self.__class__
|
||||
new_obj = cls.__new__(cls)
|
||||
memo[id(self)] = new_obj
|
||||
|
||||
# Copy the dict data (all the settings)
|
||||
for key, value in self.items():
|
||||
new_obj[key] = deepcopy(value, memo)
|
||||
|
||||
# Copy instance attributes dynamically
|
||||
# This handles Watch-specific attrs (like __datastore) and any future subclass attrs
|
||||
for attr_name in dir(self):
|
||||
# Skip methods, special attrs, and dict keys
|
||||
if attr_name.startswith('_') and not attr_name.startswith('__'):
|
||||
# This catches _model__datastore, _model__history_n, etc.
|
||||
try:
|
||||
attr_value = getattr(self, attr_name)
|
||||
|
||||
# Special handling: Share references to large objects instead of copying
|
||||
# Examples: _datastore, __datastore, __app_reference, __global_settings, etc.
|
||||
if (attr_name == '_datastore' or
|
||||
attr_name.endswith('__datastore') or
|
||||
attr_name.endswith('__app')):
|
||||
# Share the reference (don't copy!) to prevent memory leaks
|
||||
setattr(new_obj, attr_name, attr_value)
|
||||
# Skip cache attributes - let them regenerate on demand
|
||||
elif 'cache' in attr_name.lower():
|
||||
pass # Don't copy caches
|
||||
# Copy regular instance attributes
|
||||
elif not callable(attr_value):
|
||||
setattr(new_obj, attr_name, attr_value)
|
||||
except AttributeError:
|
||||
pass # Attribute doesn't exist in this instance
|
||||
|
||||
return new_obj
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Custom pickle serialization for all watch_base subclasses.
|
||||
|
||||
Excludes large reference objects (like __datastore) from serialization.
|
||||
"""
|
||||
# Get the dict data
|
||||
state = dict(self)
|
||||
|
||||
# Collect instance attributes (excluding methods and large references)
|
||||
instance_attrs = {}
|
||||
for attr_name in dir(self):
|
||||
if attr_name.startswith('_') and not attr_name.startswith('__'):
|
||||
try:
|
||||
attr_value = getattr(self, attr_name)
|
||||
# Exclude large reference objects and caches from serialization
|
||||
if not (attr_name == '_datastore' or
|
||||
attr_name.endswith('__datastore') or
|
||||
attr_name.endswith('__app') or
|
||||
'cache' in attr_name.lower() or
|
||||
callable(attr_value)):
|
||||
instance_attrs[attr_name] = attr_value
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
if instance_attrs:
|
||||
state['__instance_metadata__'] = instance_attrs
|
||||
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""
|
||||
Custom pickle deserialization for all watch_base subclasses.
|
||||
|
||||
WARNING: Large reference objects (like __datastore) are NOT restored!
|
||||
Caller must restore these references after unpickling if needed.
|
||||
"""
|
||||
# Extract metadata
|
||||
metadata = state.pop('__instance_metadata__', {})
|
||||
|
||||
# Restore dict data
|
||||
self.update(state)
|
||||
|
||||
# Restore instance attributes
|
||||
for attr_name, attr_value in metadata.items():
|
||||
setattr(self, attr_name, attr_value)
|
||||
|
||||
@property
|
||||
def data_dir(self):
|
||||
"""
|
||||
The base directory for this watch/tag data (property, computed from UUID).
|
||||
|
||||
Common property for both Watch and Tag objects.
|
||||
Returns path like: /datastore/{uuid}/
|
||||
"""
|
||||
return os.path.join(self._datastore_path, self['uuid']) if self._datastore_path else None
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
"""
|
||||
Create the data directory if it doesn't exist.
|
||||
|
||||
Common method for both Watch and Tag objects.
|
||||
"""
|
||||
from loguru import logger
|
||||
if not os.path.isdir(self.data_dir):
|
||||
logger.debug(f"> Creating data dir {self.data_dir}")
|
||||
os.mkdir(self.data_dir)
|
||||
|
||||
def get_global_setting(self, *path):
|
||||
"""
|
||||
Get a setting from the global datastore configuration.
|
||||
|
||||
Args:
|
||||
*path: Path to the setting (e.g., 'application', 'history_snapshot_max_length')
|
||||
|
||||
Returns:
|
||||
The setting value, or None if not found
|
||||
|
||||
Example:
|
||||
maxlen = self.get_global_setting('application', 'history_snapshot_max_length')
|
||||
"""
|
||||
if not self._datastore:
|
||||
return None
|
||||
|
||||
try:
|
||||
value = self._datastore['settings']
|
||||
for key in path:
|
||||
value = value[key]
|
||||
return value
|
||||
except (KeyError, TypeError):
|
||||
return None
|
||||
|
||||
def _get_commit_data(self):
|
||||
"""
|
||||
Prepare data for commit (can be overridden by subclasses).
|
||||
|
||||
Returns:
|
||||
dict: Data to serialize (filtered as needed by subclass)
|
||||
"""
|
||||
import copy
|
||||
|
||||
# Acquire datastore lock to prevent concurrent modifications during copy
|
||||
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
snapshot = dict(self)
|
||||
else:
|
||||
snapshot = dict(self)
|
||||
|
||||
# Deep copy snapshot (slower, but done outside lock to minimize contention)
|
||||
# Subclasses can override to filter keys (e.g., Watch excludes processor_config_*)
|
||||
return {k: copy.deepcopy(v) for k, v in snapshot.items()}
|
||||
|
||||
def _save_to_disk(self, data_dict, uuid):
|
||||
"""
|
||||
Save data to disk (must be implemented by subclasses).
|
||||
|
||||
Args:
|
||||
data_dict: Dictionary to save
|
||||
uuid: UUID for logging
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If subclass doesn't implement
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _save_to_disk()")
|
||||
|
||||
def commit(self):
|
||||
"""
|
||||
Save this watch/tag immediately to disk using atomic write.
|
||||
|
||||
Common commit logic for Watch and Tag objects.
|
||||
Subclasses override _get_commit_data() and _save_to_disk() for specifics.
|
||||
|
||||
Fire-and-forget: Logs errors but does not raise exceptions.
|
||||
Data remains in memory even if save fails, so next commit will retry.
|
||||
"""
|
||||
from loguru import logger
|
||||
|
||||
if not self.data_dir:
|
||||
entity_type = self.__class__.__name__
|
||||
logger.error(f"Cannot commit {entity_type} {self.get('uuid')} without datastore_path")
|
||||
return
|
||||
|
||||
uuid = self.get('uuid')
|
||||
if not uuid:
|
||||
entity_type = self.__class__.__name__
|
||||
logger.error(f"Cannot commit {entity_type} without UUID")
|
||||
return
|
||||
|
||||
# Get data from subclass (may filter keys)
|
||||
try:
|
||||
data_dict = self._get_commit_data()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to prepare commit data for {uuid}: {e}")
|
||||
return
|
||||
|
||||
# Save to disk via subclass implementation
|
||||
try:
|
||||
# Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
|
||||
entity_type = _determine_entity_type(self.__class__)
|
||||
filename = f"{entity_type}.json"
|
||||
self._save_to_disk(data_dict, uuid)
|
||||
logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to commit {uuid}: {e}")
|
||||
del self['default']
|
||||
@@ -1,84 +0,0 @@
|
||||
"""
|
||||
Entity persistence mixin for Watch and Tag models.
|
||||
|
||||
Provides file-based persistence using atomic writes.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import inspect
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _determine_entity_type(cls):
|
||||
"""
|
||||
Determine entity type from class hierarchy (cached at class level).
|
||||
|
||||
Args:
|
||||
cls: The class to inspect
|
||||
|
||||
Returns:
|
||||
str: Entity type ('watch', 'tag', etc.)
|
||||
|
||||
Raises:
|
||||
ValueError: If entity type cannot be determined
|
||||
"""
|
||||
for base_class in inspect.getmro(cls):
|
||||
module_name = base_class.__module__
|
||||
if module_name.startswith('changedetectionio.model.'):
|
||||
# Get last part after dot: "changedetectionio.model.Watch" -> "watch"
|
||||
return module_name.split('.')[-1].lower()
|
||||
|
||||
raise ValueError(
|
||||
f"Cannot determine entity type for {cls.__module__}.{cls.__name__}. "
|
||||
f"Entity must inherit from a class in changedetectionio.model (Watch or Tag)."
|
||||
)
|
||||
|
||||
|
||||
class EntityPersistenceMixin:
|
||||
"""
|
||||
Mixin providing file persistence for watch_base subclasses (Watch, Tag, etc.).
|
||||
|
||||
This mixin provides the _save_to_disk() method required by watch_base.commit().
|
||||
It automatically determines the correct filename and size limits based on class hierarchy.
|
||||
|
||||
Usage:
|
||||
class model(EntityPersistenceMixin, watch_base): # in Watch.py
|
||||
pass
|
||||
|
||||
class model(EntityPersistenceMixin, watch_base): # in Tag.py
|
||||
pass
|
||||
"""
|
||||
|
||||
def _save_to_disk(self, data_dict, uuid):
|
||||
"""
|
||||
Save entity to disk using atomic write.
|
||||
|
||||
Implements the abstract method required by watch_base.commit().
|
||||
Automatically determines filename and size limits from class hierarchy.
|
||||
|
||||
Args:
|
||||
data_dict: Dictionary to save
|
||||
uuid: UUID for logging
|
||||
|
||||
Raises:
|
||||
ValueError: If entity type cannot be determined from class hierarchy
|
||||
"""
|
||||
# Import here to avoid circular dependency
|
||||
from changedetectionio.store.file_saving_datastore import save_entity_atomic
|
||||
|
||||
# Determine entity type (cached at class level, not instance level)
|
||||
entity_type = _determine_entity_type(self.__class__)
|
||||
|
||||
# Set filename and size limits based on entity type
|
||||
filename = f'{entity_type}.json'
|
||||
max_size_mb = 10 if entity_type == 'watch' else 1
|
||||
|
||||
# Save using generic function
|
||||
save_entity_atomic(
|
||||
self.data_dir,
|
||||
uuid,
|
||||
data_dict,
|
||||
filename=filename,
|
||||
entity_type=entity_type,
|
||||
max_size_mb=max_size_mb
|
||||
)
|
||||
@@ -1,92 +0,0 @@
|
||||
"""
|
||||
Schema utilities for Watch and Tag models.
|
||||
|
||||
Provides functions to extract readonly fields and properties from OpenAPI spec.
|
||||
Shared by both the model layer and API layer to avoid circular dependencies.
|
||||
"""
|
||||
|
||||
import functools
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_schema_dict():
|
||||
"""
|
||||
Get the raw OpenAPI spec dictionary for schema access.
|
||||
|
||||
Returns the YAML dict directly (not the OpenAPI object).
|
||||
"""
|
||||
import os
|
||||
import yaml
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _resolve_readonly_fields(schema_name):
|
||||
"""
|
||||
Generic helper to resolve readOnly fields, including allOf inheritance.
|
||||
|
||||
Args:
|
||||
schema_name: Name of the schema (e.g., 'Watch', 'Tag')
|
||||
|
||||
Returns:
|
||||
frozenset: All readOnly field names including inherited ones
|
||||
"""
|
||||
spec_dict = get_openapi_schema_dict()
|
||||
schema = spec_dict['components']['schemas'].get(schema_name, {})
|
||||
|
||||
readonly_fields = set()
|
||||
|
||||
# Handle allOf (schema inheritance)
|
||||
if 'allOf' in schema:
|
||||
for item in schema['allOf']:
|
||||
# Resolve $ref to parent schema
|
||||
if '$ref' in item:
|
||||
ref_path = item['$ref'].split('/')[-1]
|
||||
ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
|
||||
if 'properties' in ref_schema:
|
||||
for field_name, field_def in ref_schema['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
# Check schema-specific properties
|
||||
if 'properties' in item:
|
||||
for field_name, field_def in item['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
else:
|
||||
# Direct properties (no inheritance)
|
||||
if 'properties' in schema:
|
||||
for field_name, field_def in schema['properties'].items():
|
||||
if field_def.get('readOnly') is True:
|
||||
readonly_fields.add(field_name)
|
||||
|
||||
return frozenset(readonly_fields)
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_readonly_watch_fields():
|
||||
"""
|
||||
Extract readOnly field names from Watch schema in OpenAPI spec.
|
||||
|
||||
Returns readOnly fields from WatchBase (uuid, date_created) + Watch-specific readOnly fields.
|
||||
|
||||
Used by:
|
||||
- model/watch_base.py: Track when writable fields are edited
|
||||
- api/Watch.py: Filter readonly fields from PUT requests
|
||||
"""
|
||||
return _resolve_readonly_fields('Watch')
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_readonly_tag_fields():
|
||||
"""
|
||||
Extract readOnly field names from Tag schema in OpenAPI spec.
|
||||
|
||||
Returns readOnly fields from WatchBase (uuid, date_created) + Tag-specific readOnly fields.
|
||||
"""
|
||||
return _resolve_readonly_fields('Tag')
|
||||
@@ -54,152 +54,34 @@ def _check_cascading_vars(datastore, var_name, watch):
|
||||
return None
|
||||
|
||||
|
||||
class FormattableTimestamp(str):
|
||||
"""
|
||||
A str subclass representing a formatted datetime. As a plain string it renders
|
||||
with the default format, but can also be called with a custom format argument
|
||||
in Jinja2 templates:
|
||||
|
||||
{{ change_datetime }} → '2024-01-15 10:30:00 UTC'
|
||||
{{ change_datetime(format='%Y') }} → '2024'
|
||||
{{ change_datetime(format='%A') }} → 'Monday'
|
||||
{{ change_datetime(format='%Y-%m-%d') }} → '2024-01-15'
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
_DEFAULT_FORMAT = '%Y-%m-%d %H:%M:%S %Z'
|
||||
|
||||
def __new__(cls, timestamp):
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
dt_local = dt.astimezone(local_tz)
|
||||
try:
|
||||
formatted = dt_local.strftime(cls._DEFAULT_FORMAT)
|
||||
except Exception:
|
||||
formatted = dt_local.isoformat()
|
||||
instance = super().__new__(cls, formatted)
|
||||
instance._dt = dt_local
|
||||
return instance
|
||||
|
||||
def __call__(self, format=_DEFAULT_FORMAT):
|
||||
try:
|
||||
return self._dt.strftime(format)
|
||||
except Exception:
|
||||
return self._dt.isoformat()
|
||||
|
||||
|
||||
class FormattableExtract(str):
|
||||
"""
|
||||
A str subclass that holds only the extracted changed fragments from a diff.
|
||||
Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
|
||||
|
||||
{{ diff_changed_from }} → old value(s) only, e.g. "$99.99"
|
||||
{{ diff_changed_to }} → new value(s) only, e.g. "$109.99"
|
||||
|
||||
Multiple changed fragments are joined with newlines.
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
raw = diff_module.render_diff(prev_snapshot, current_snapshot, word_diff=True)
|
||||
extracted = extract_fn(raw)
|
||||
else:
|
||||
extracted = ''
|
||||
instance = super().__new__(cls, extracted)
|
||||
return instance
|
||||
|
||||
|
||||
class FormattableDiff(str):
|
||||
"""
|
||||
A str subclass representing a rendered diff. As a plain string it renders
|
||||
with the default options for that variant, but can be called with custom
|
||||
arguments in Jinja2 templates:
|
||||
|
||||
{{ diff }} → default diff output
|
||||
{{ diff(lines=5) }} → truncate to 5 lines
|
||||
{{ diff(added_only=true) }} → only show added lines
|
||||
{{ diff(removed_only=true) }} → only show removed lines
|
||||
{{ diff(context=3) }} → 3 lines of context around changes
|
||||
{{ diff(word_diff=false) }} → line-level diff instead of word-level
|
||||
{{ diff(lines=10, added_only=true) }} → combine args
|
||||
{{ diff_added(lines=5) }} → works on any diff_* variant too
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs)
|
||||
else:
|
||||
rendered = ''
|
||||
instance = super().__new__(cls, rendered)
|
||||
instance._prev = prev_snapshot
|
||||
instance._current = current_snapshot
|
||||
instance._base_kwargs = base_kwargs
|
||||
return instance
|
||||
|
||||
def __call__(self, lines=None, added_only=False, removed_only=False, context=0,
|
||||
word_diff=None, case_insensitive=False, ignore_junk=False):
|
||||
from changedetectionio import diff as diff_module
|
||||
kwargs = dict(self._base_kwargs)
|
||||
|
||||
if added_only:
|
||||
kwargs['include_removed'] = False
|
||||
if removed_only:
|
||||
kwargs['include_added'] = False
|
||||
if context:
|
||||
kwargs['context_lines'] = int(context)
|
||||
if word_diff is not None:
|
||||
kwargs['word_diff'] = bool(word_diff)
|
||||
if case_insensitive:
|
||||
kwargs['case_insensitive'] = True
|
||||
if ignore_junk:
|
||||
kwargs['ignore_junk'] = True
|
||||
|
||||
result = diff_module.render_diff(self._prev or '', self._current or '', **kwargs)
|
||||
|
||||
if lines is not None:
|
||||
result = '\n'.join(result.splitlines()[:int(lines)])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||
class NotificationContextData(dict):
|
||||
def __init__(self, initial_data=None, **kwargs):
|
||||
# ValidateJinja2Template() validates against the keynames of this dict to check for valid tokens in the body (user submission)
|
||||
super().__init__({
|
||||
'base_url': None,
|
||||
'change_datetime': FormattableTimestamp(time.time()),
|
||||
'current_snapshot': None,
|
||||
'diff': FormattableDiff('', ''),
|
||||
'diff_clean': FormattableDiff('', '', include_change_type_prefix=False),
|
||||
'diff_added': FormattableDiff('', '', include_removed=False),
|
||||
'diff_added_clean': FormattableDiff('', '', include_removed=False, include_change_type_prefix=False),
|
||||
'diff_full': FormattableDiff('', '', include_equal=True),
|
||||
'diff_full_clean': FormattableDiff('', '', include_equal=True, include_change_type_prefix=False),
|
||||
'diff_patch': FormattableDiff('', '', patch_format=True),
|
||||
'diff_removed': FormattableDiff('', '', include_added=False),
|
||||
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
|
||||
'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
|
||||
'diff': None,
|
||||
'diff_clean': None,
|
||||
'diff_added': None,
|
||||
'diff_added_clean': None,
|
||||
'diff_full': None,
|
||||
'diff_full_clean': None,
|
||||
'diff_patch': None,
|
||||
'diff_removed': None,
|
||||
'diff_removed_clean': None,
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
'prev_snapshot': None,
|
||||
'preview_url': None,
|
||||
'screenshot': None,
|
||||
'triggered_text': None,
|
||||
'timestamp_from': None,
|
||||
'timestamp_to': None,
|
||||
'triggered_text': None,
|
||||
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
'watch_mime_type': None,
|
||||
'watch_tag': None,
|
||||
'watch_title': None,
|
||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||
'watch_uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
})
|
||||
|
||||
# Apply any initial data passed in
|
||||
@@ -221,7 +103,7 @@ class NotificationContextData(dict):
|
||||
So we can test the output in the notification body
|
||||
"""
|
||||
for key in self.keys():
|
||||
if key in ['uuid', 'time', 'watch_uuid', 'change_datetime'] or key.startswith('diff'):
|
||||
if key in ['uuid', 'time', 'watch_uuid']:
|
||||
continue
|
||||
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
||||
self[key] = rand_str
|
||||
@@ -233,6 +115,24 @@ class NotificationContextData(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def timestamp_to_localtime(timestamp):
|
||||
# Format the date using locale-aware formatting with timezone
|
||||
dt = datetime.datetime.fromtimestamp(int(timestamp))
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
|
||||
# Get local timezone-aware datetime
|
||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||
local_dt = dt.astimezone(local_tz)
|
||||
|
||||
# Format date with timezone - using strftime for locale awareness
|
||||
try:
|
||||
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
|
||||
except:
|
||||
# Fallback if locale issues
|
||||
formatted_date = local_dt.isoformat()
|
||||
|
||||
return formatted_date
|
||||
|
||||
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
|
||||
"""
|
||||
Efficiently renders only the diff placeholders that are actually used in the notification text.
|
||||
@@ -250,12 +150,13 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
Returns:
|
||||
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
import re
|
||||
from functools import lru_cache
|
||||
|
||||
now = time.time()
|
||||
|
||||
# Define base kwargs for each diff variant — these become the stored defaults
|
||||
# on the FormattableDiff object, so {{ diff(lines=5) }} overrides on top of them
|
||||
# Define specifications for each diff variant
|
||||
diff_specs = {
|
||||
'diff': {'word_diff': word_diff},
|
||||
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
|
||||
@@ -268,27 +169,23 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||
}
|
||||
|
||||
from changedetectionio.diff import extract_changed_from, extract_changed_to
|
||||
extract_specs = {
|
||||
'diff_changed_from': extract_changed_from,
|
||||
'diff_changed_to': extract_changed_to,
|
||||
}
|
||||
# Memoize render_diff to avoid duplicate renders with same kwargs
|
||||
@lru_cache(maxsize=4)
|
||||
def cached_render(kwargs_tuple):
|
||||
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
|
||||
|
||||
ret = {}
|
||||
rendered_count = 0
|
||||
# Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
|
||||
# Only check and render diff keys that exist in NotificationContextData
|
||||
for key in NotificationContextData().keys():
|
||||
if not key.startswith('diff'):
|
||||
continue
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if not re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
continue
|
||||
if key in diff_specs:
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
elif key in extract_specs:
|
||||
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
|
||||
rendered_count += 1
|
||||
if key.startswith('diff') and key in diff_specs:
|
||||
# Check if this placeholder is actually used in the notification text
|
||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
kwargs = diff_specs[key]
|
||||
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
|
||||
ret[key] = cached_render(tuple(sorted(kwargs.items())))
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
|
||||
@@ -301,7 +198,7 @@ def set_basic_notification_vars(current_snapshot, prev_snapshot, watch, triggere
|
||||
'current_snapshot': current_snapshot,
|
||||
'prev_snapshot': prev_snapshot,
|
||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||
'change_datetime': FormattableTimestamp(timestamp_changed) if timestamp_changed else None,
|
||||
'change_datetime': timestamp_to_localtime(timestamp_changed) if timestamp_changed else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch.get('uuid') if watch else None,
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
|
||||
@@ -129,109 +129,6 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def update_handler_alter(update_handler, watch, datastore):
|
||||
"""Modify or wrap the update_handler before it processes a watch.
|
||||
|
||||
This hook is called after the update_handler (perform_site_check instance) is created
|
||||
but before it calls call_browser() and run_changedetection(). Plugins can use this to:
|
||||
- Wrap the handler to add logging/metrics
|
||||
- Modify handler configuration
|
||||
- Add custom preprocessing logic
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance that will process the watch
|
||||
watch: The watch dict being processed
|
||||
datastore: The application datastore
|
||||
|
||||
Returns:
|
||||
object or None: Return a modified/wrapped handler, or None to keep the original.
|
||||
If multiple plugins return handlers, they are chained in registration order.
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def update_finalize(update_handler, watch, datastore, processing_exception):
|
||||
"""Called after watch processing completes (success or failure).
|
||||
|
||||
This hook is called in the finally block after all processing is complete,
|
||||
allowing plugins to perform cleanup, update metrics, or log final status.
|
||||
|
||||
The plugin can access update_handler.last_logging_insert_id if it was stored
|
||||
during update_handler_alter, and use processing_exception to determine if
|
||||
the processing succeeded or failed.
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance (may be None if creation failed)
|
||||
watch: The watch dict that was processed (may be None if not loaded)
|
||||
datastore: The application datastore
|
||||
processing_exception: The exception from the main processing block, or None if successful.
|
||||
This does NOT include cleanup exceptions - only exceptions from
|
||||
the actual watch processing (fetch, diff, etc).
|
||||
|
||||
Returns:
|
||||
None: This hook doesn't return a value
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def get_html_head_extras():
|
||||
"""Return HTML to inject into the <head> of every page via base.html.
|
||||
|
||||
Plugins can use this to add <script>, <style>, or <link> tags that should
|
||||
be present on all pages. Return a raw HTML string or None.
|
||||
|
||||
IMPORTANT: Always use Flask's url_for() for any src/href URLs so that
|
||||
sub-path deployments (nginx reverse proxy with USE_X_SETTINGS / X-Forwarded-Prefix)
|
||||
work correctly. This hook is called inside a request context so url_for() is
|
||||
always available.
|
||||
|
||||
For small amounts of CSS/JS, return them inline — no file-serving needed::
|
||||
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
return (
|
||||
'<style>.my-module-banner { color: red; }</style>\\n'
|
||||
'<script>console.log("my_module_content loaded");</script>'
|
||||
)
|
||||
|
||||
For larger assets, register your own lightweight Flask routes in the plugin
|
||||
module and point to them with url_for() so the sub-path prefix is handled
|
||||
automatically::
|
||||
|
||||
from flask import url_for, Response
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
from changedetectionio.flask_app import app as _app
|
||||
|
||||
MY_CSS = ".my-module-example { color: red; }"
|
||||
MY_JS = "console.log('my_module_content loaded');"
|
||||
|
||||
@_app.route('/my_module_content/css')
|
||||
def my_module_content_css():
|
||||
return Response(MY_CSS, mimetype='text/css',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@_app.route('/my_module_content/js')
|
||||
def my_module_content_js():
|
||||
return Response(MY_JS, mimetype='application/javascript',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
css = url_for('my_module_content_css')
|
||||
js = url_for('my_module_content_js')
|
||||
return (
|
||||
f'<link rel="stylesheet" href="{css}">\\n'
|
||||
f'<script src="{js}" defer></script>'
|
||||
)
|
||||
|
||||
Returns:
|
||||
str or None: Raw HTML string to inject inside <head>, or None
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
@@ -602,82 +499,4 @@ def get_plugin_template_paths():
|
||||
template_paths.append(templates_dir)
|
||||
logger.debug(f"Added plugin template path: {templates_dir}")
|
||||
|
||||
return template_paths
|
||||
|
||||
|
||||
def apply_update_handler_alter(update_handler, watch, datastore):
|
||||
"""Apply update_handler_alter hooks from all plugins.
|
||||
|
||||
Allows plugins to wrap or modify the update_handler before it processes a watch.
|
||||
Multiple plugins can chain modifications - each plugin receives the result from
|
||||
the previous plugin.
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance to potentially modify
|
||||
watch: The watch dict being processed
|
||||
datastore: The application datastore
|
||||
|
||||
Returns:
|
||||
object: The (potentially modified/wrapped) update_handler
|
||||
"""
|
||||
# Get all plugins that implement the update_handler_alter hook
|
||||
results = plugin_manager.hook.update_handler_alter(
|
||||
update_handler=update_handler,
|
||||
watch=watch,
|
||||
datastore=datastore
|
||||
)
|
||||
|
||||
# Chain results - each plugin gets the result from the previous one
|
||||
current_handler = update_handler
|
||||
if results:
|
||||
for result in results:
|
||||
if result is not None:
|
||||
logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}")
|
||||
current_handler = result
|
||||
|
||||
return current_handler
|
||||
|
||||
|
||||
def apply_update_finalize(update_handler, watch, datastore, processing_exception):
|
||||
"""Apply update_finalize hooks from all plugins.
|
||||
|
||||
Called in the finally block after watch processing completes, allowing plugins
|
||||
to perform cleanup, update metrics, or log final status.
|
||||
|
||||
Args:
|
||||
update_handler: The perform_site_check instance (may be None)
|
||||
watch: The watch dict that was processed (may be None)
|
||||
datastore: The application datastore
|
||||
processing_exception: The exception from processing, or None if successful
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
try:
|
||||
# Call all plugins that implement the update_finalize hook
|
||||
plugin_manager.hook.update_finalize(
|
||||
update_handler=update_handler,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
processing_exception=processing_exception
|
||||
)
|
||||
except Exception as e:
|
||||
# Don't let plugin errors crash the worker
|
||||
logger.error(f"Error in update_finalize hook: {e}")
|
||||
logger.exception(f"update_finalize hook exception details:")
|
||||
|
||||
|
||||
def collect_html_head_extras():
|
||||
"""Collect and combine HTML head extras from all plugins.
|
||||
|
||||
Called from a Flask template global so it always runs inside a request context.
|
||||
This means url_for() works correctly in plugin implementations, including when the
|
||||
app is deployed under a sub-path via USE_X_SETTINGS / X-Forwarded-Prefix (ProxyFix
|
||||
sets SCRIPT_NAME so url_for() automatically prepends the prefix).
|
||||
|
||||
Returns:
|
||||
str: Combined HTML string to inject inside <head>, or empty string
|
||||
"""
|
||||
results = plugin_manager.hook.get_html_head_extras()
|
||||
parts = [r for r in results if r]
|
||||
return "\n".join(parts) if parts else ""
|
||||
return template_paths
|
||||
@@ -9,15 +9,6 @@ Some suggestions for the future
|
||||
|
||||
- `graphical`
|
||||
|
||||
## API schema extension (`api.yaml`)
|
||||
|
||||
A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`.
|
||||
Define a `components.schemas.processor_config_<name>` entry and it will be merged into `WatchBase` at startup,
|
||||
making `processor_config_<name>` a valid field on all watch create/update API calls.
|
||||
The fully merged spec is served live at `/api/v1/full-spec`.
|
||||
|
||||
See `restock_diff/api.yaml` for a working example.
|
||||
|
||||
## Todo
|
||||
|
||||
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from functools import lru_cache
|
||||
from loguru import logger
|
||||
from flask_babel import gettext, get_locale
|
||||
from flask_babel import gettext
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
@@ -190,15 +190,14 @@ def get_plugin_processor_metadata():
|
||||
logger.warning(f"Error getting plugin processor metadata: {e}")
|
||||
return metadata
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def _available_processors_cached(locale_str):
|
||||
"""
|
||||
Internal cached function that includes locale in cache key.
|
||||
This ensures translations are cached per-language instead of globally.
|
||||
|
||||
:param locale_str: The locale string (e.g., 'en', 'it', 'zh')
|
||||
:return: A list of tuples (processor_name, translated_description, weight)
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||
:return: A list :)
|
||||
"""
|
||||
|
||||
processor_classes = find_processors()
|
||||
|
||||
# Check if DISABLED_PROCESSORS env var is set
|
||||
@@ -257,22 +256,6 @@ def _available_processors_cached(locale_str):
|
||||
# Return as tuples without weight (for backwards compatibility)
|
||||
return [(name, desc) for name, desc, weight in available]
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||
|
||||
This function delegates to a locale-aware cached version to ensure translations
|
||||
are cached per-language instead of globally.
|
||||
|
||||
:return: A list of tuples (processor_name, translated_description)
|
||||
"""
|
||||
# Get current locale and use it as cache key
|
||||
# Convert Babel Locale object to string for use as cache key
|
||||
locale = get_locale()
|
||||
locale_str = str(locale) if locale else 'en'
|
||||
return _available_processors_cached(locale_str)
|
||||
|
||||
|
||||
def get_default_processor():
|
||||
"""
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
import asyncio
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from changedetectionio.browser_steps.browser_steps import browser_steps_get_valid_steps
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.validate_url import is_private_hostname
|
||||
from copy import deepcopy
|
||||
from abc import abstractmethod
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
||||
@@ -24,7 +19,6 @@ class difference_detection_processor():
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
screenshot_format = SCREENSHOT_FORMAT_JPEG
|
||||
last_raw_content_checksum = None
|
||||
|
||||
def __init__(self, datastore, watch_uuid):
|
||||
self.datastore = datastore
|
||||
@@ -40,81 +34,6 @@ class difference_detection_processor():
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
# Load the last raw content checksum from file
|
||||
self.read_last_raw_content_checksum()
|
||||
|
||||
def update_last_raw_content_checksum(self, checksum):
|
||||
"""
|
||||
Save the raw content MD5 checksum to file.
|
||||
This is used for skip logic - avoid reprocessing if raw HTML unchanged.
|
||||
"""
|
||||
if not checksum:
|
||||
return
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
if not watch:
|
||||
return
|
||||
|
||||
data_dir = watch.data_dir
|
||||
if not data_dir:
|
||||
return
|
||||
|
||||
watch.ensure_data_dir_exists()
|
||||
checksum_file = os.path.join(data_dir, 'last-checksum.txt')
|
||||
|
||||
try:
|
||||
with open(checksum_file, 'w', encoding='utf-8') as f:
|
||||
f.write(checksum)
|
||||
self.last_raw_content_checksum = checksum
|
||||
except IOError as e:
|
||||
logger.warning(f"Failed to write checksum file for {self.watch_uuid}: {e}")
|
||||
|
||||
def read_last_raw_content_checksum(self):
|
||||
"""
|
||||
Read the last raw content MD5 checksum from file.
|
||||
Returns None if file doesn't exist (first run) or can't be read.
|
||||
"""
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
if not watch:
|
||||
self.last_raw_content_checksum = None
|
||||
return
|
||||
|
||||
data_dir = watch.data_dir
|
||||
if not data_dir:
|
||||
self.last_raw_content_checksum = None
|
||||
return
|
||||
|
||||
checksum_file = os.path.join(data_dir, 'last-checksum.txt')
|
||||
|
||||
if not os.path.isfile(checksum_file):
|
||||
self.last_raw_content_checksum = None
|
||||
return
|
||||
|
||||
try:
|
||||
with open(checksum_file, 'r', encoding='utf-8') as f:
|
||||
self.last_raw_content_checksum = f.read().strip()
|
||||
except IOError as e:
|
||||
logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
|
||||
self.last_raw_content_checksum = None
|
||||
|
||||
|
||||
async def validate_iana_url(self):
|
||||
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
|
||||
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
|
||||
through call_browser().
|
||||
"""
|
||||
if strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')):
|
||||
return
|
||||
parsed = urlparse(self.watch.link)
|
||||
if not parsed.hostname:
|
||||
return
|
||||
loop = asyncio.get_running_loop()
|
||||
if await loop.run_in_executor(None, is_private_hostname, parsed.hostname):
|
||||
raise Exception(
|
||||
f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address. "
|
||||
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
|
||||
)
|
||||
|
||||
async def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
@@ -128,8 +47,6 @@ class difference_detection_processor():
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
await self.validate_iana_url()
|
||||
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
@@ -193,7 +110,7 @@ class difference_detection_processor():
|
||||
)
|
||||
|
||||
if self.watch.has_browser_steps:
|
||||
self.fetcher.browser_steps = browser_steps_get_valid_steps(self.watch.get('browser_steps', []))
|
||||
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
@@ -260,16 +177,6 @@ class difference_detection_processor():
|
||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||
await self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
|
||||
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
|
||||
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
|
||||
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
|
||||
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
|
||||
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||
|
||||
if self.fetcher.content and isinstance(self.fetcher.content, str):
|
||||
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
def get_extra_watch_config(self, filename):
|
||||
@@ -286,12 +193,12 @@ class difference_detection_processor():
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
data_dir = watch.data_dir
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
|
||||
if not data_dir:
|
||||
if not watch_data_dir:
|
||||
return {}
|
||||
|
||||
filepath = os.path.join(data_dir, filename)
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
return {}
|
||||
@@ -316,16 +223,16 @@ class difference_detection_processor():
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
data_dir = watch.data_dir
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
|
||||
if not data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no data_dir")
|
||||
if not watch_data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
|
||||
return
|
||||
|
||||
# Ensure directory exists
|
||||
watch.ensure_data_dir_exists()
|
||||
|
||||
filepath = os.path.join(data_dir, filename)
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
|
||||
try:
|
||||
# If merge is enabled, read existing data first
|
||||
@@ -350,16 +257,8 @@ class difference_detection_processor():
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to write extra watch config {filename}: {e}")
|
||||
|
||||
def get_raw_document_checksum(self):
|
||||
checksum = None
|
||||
|
||||
if self.fetcher.content:
|
||||
checksum = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||
|
||||
return checksum
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
def run_changedetection(self, watch):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
|
||||
@@ -42,7 +42,10 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
||||
# Get error information for the template
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
|
||||
@@ -414,7 +414,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
|
||||
|
||||
# Load historical data if available (for charts/visualization)
|
||||
comparison_data = {}
|
||||
comparison_config_path = os.path.join(watch.data_dir, "visual_comparison_data.json")
|
||||
comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
|
||||
if os.path.isfile(comparison_config_path):
|
||||
try:
|
||||
with open(comparison_config_path, 'r') as f:
|
||||
|
||||
@@ -90,7 +90,7 @@ def on_config_save(watch, processor_config, datastore):
|
||||
processor_config['auto_track_region'] = False
|
||||
|
||||
# Remove old template file if exists
|
||||
template_path = os.path.join(watch.data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
if os.path.exists(template_path):
|
||||
os.remove(template_path)
|
||||
logger.debug(f"Removed old template file: {template_path}")
|
||||
|
||||
@@ -30,7 +30,7 @@ class perform_site_check(difference_detection_processor):
|
||||
# Override to use PNG format for better image comparison (JPEG compression creates noise)
|
||||
screenshot_format = SCREENSHOT_FORMAT_PNG
|
||||
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
def run_changedetection(self, watch):
|
||||
"""
|
||||
Perform screenshot comparison using OpenCV subprocess handler.
|
||||
|
||||
|
||||
@@ -100,13 +100,7 @@ class guess_stream_type():
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
|
||||
# A JSONP response starts with an identifier followed by '(' - not valid JSON
|
||||
if re.match(r'^\w[\w.]*\s*\(', test_content):
|
||||
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
|
||||
self.is_plaintext = True
|
||||
else:
|
||||
self.is_json = True
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
# magic will call a rss document 'xml'
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
|
||||
from babel.numbers import parse_decimal
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
@@ -11,8 +10,6 @@ supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
_price_re = re.compile(r"Price:\s*(\d+(?:\.\d+)?)", re.IGNORECASE)
|
||||
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
@@ -34,7 +31,6 @@ class Restock(dict):
|
||||
|
||||
if standardized_value:
|
||||
# Convert to float
|
||||
# @todo locale needs to be the locale of the webpage
|
||||
return float(parse_decimal(standardized_value, locale='en'))
|
||||
|
||||
return None
|
||||
@@ -66,22 +62,15 @@ class Restock(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def get_price_from_history_str(history_str):
|
||||
m = _price_re.search(history_str)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
try:
|
||||
return str(Decimal(m.group(1)))
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
class Watch(BaseWatch):
|
||||
def __init__(self, *arg, **kw):
|
||||
super().__init__(*arg, **kw)
|
||||
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
|
||||
|
||||
self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
|
||||
'follow_price_changes': True,
|
||||
'in_stock_processing' : 'in_stock_only'
|
||||
} #@todo update
|
||||
|
||||
def clear_watch(self):
|
||||
super().clear_watch()
|
||||
@@ -90,27 +79,13 @@ class Watch(BaseWatch):
|
||||
def extra_notification_token_values(self):
|
||||
values = super().extra_notification_token_values()
|
||||
values['restock'] = self.get('restock', {})
|
||||
|
||||
values['restock']['previous_price'] = None
|
||||
if self.history_n >= 2:
|
||||
history = self.history
|
||||
if history and len(history) >=2:
|
||||
"""Unfortunately for now timestamp is stored as string key"""
|
||||
sorted_keys = sorted(list(history), key=lambda x: int(x))
|
||||
sorted_keys.reverse()
|
||||
|
||||
price_str = self.get_history_snapshot(timestamp=sorted_keys[-1])
|
||||
if price_str:
|
||||
values['restock']['previous_price'] = get_price_from_history_str(price_str)
|
||||
return values
|
||||
|
||||
def extra_notification_token_placeholder_info(self):
|
||||
values = super().extra_notification_token_placeholder_info()
|
||||
|
||||
values.append(('restock.price', "Price detected"))
|
||||
values.append(('restock.in_stock', "In stock status"))
|
||||
values.append(('restock.original_price', "Original price at first check"))
|
||||
values.append(('restock.previous_price', "Previous price in history"))
|
||||
|
||||
return values
|
||||
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
components:
|
||||
schemas:
|
||||
processor_config_restock_diff:
|
||||
type: object
|
||||
description: Configuration for the restock_diff processor (restock and price tracking)
|
||||
properties:
|
||||
in_stock_processing:
|
||||
type: string
|
||||
enum: [in_stock_only, all_changes, 'off']
|
||||
default: in_stock_only
|
||||
description: |
|
||||
When to trigger on stock changes:
|
||||
- `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions
|
||||
- `all_changes`: Trigger on any availability change
|
||||
- `off`: Disable stock/availability tracking
|
||||
follow_price_changes:
|
||||
type: boolean
|
||||
default: true
|
||||
description: Monitor and track price changes
|
||||
price_change_min:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price drops below this value
|
||||
price_change_max:
|
||||
type: [number, 'null']
|
||||
description: Trigger a notification when the price rises above this value
|
||||
price_change_threshold_percent:
|
||||
type: [number, 'null']
|
||||
minimum: 0
|
||||
maximum: 100
|
||||
description: Minimum price change percentage since the original price to trigger a notification
|
||||
|
||||
paths:
|
||||
/watch:
|
||||
post:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"url": "https://example.com/product",
|
||||
"processor": "restock_diff",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 5
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Restock & price tracking'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
data = {
|
||||
'url': 'https://example.com/product',
|
||||
'processor': 'restock_diff',
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 5,
|
||||
}
|
||||
}
|
||||
response = requests.post('http://localhost:5000/api/v1/watch',
|
||||
headers=headers, json=data)
|
||||
print(response.json())
|
||||
|
||||
/watch/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": true,
|
||||
"price_change_min": 10.00,
|
||||
"price_change_max": 500.00
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Update restock config'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
uuid = 'YOUR-UUID'
|
||||
data = {
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'all_changes',
|
||||
'follow_price_changes': True,
|
||||
'price_change_min': 10.00,
|
||||
'price_change_max': 500.00,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
|
||||
/tag/{uuid}:
|
||||
put:
|
||||
x-code-samples:
|
||||
- lang: 'curl'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \
|
||||
-H "x-api-key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"overrides_watch": true,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": true,
|
||||
"price_change_threshold_percent": 10
|
||||
}
|
||||
}'
|
||||
- lang: 'Python'
|
||||
label: 'Set restock config on group/tag'
|
||||
source: |
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'x-api-key': 'YOUR_API_KEY',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
tag_uuid = 'YOUR-TAG-UUID'
|
||||
data = {
|
||||
'overrides_watch': True,
|
||||
'processor_config_restock_diff': {
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
'follow_price_changes': True,
|
||||
'price_change_threshold_percent': 10,
|
||||
}
|
||||
}
|
||||
response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}',
|
||||
headers=headers, json=data)
|
||||
print(response.text)
|
||||
@@ -31,7 +31,7 @@ class RestockSettingsForm(Form):
|
||||
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
|
||||
|
||||
class processor_settings_form(processor_text_json_diff_form):
|
||||
processor_config_restock_diff = FormField(RestockSettingsForm)
|
||||
restock_settings = FormField(RestockSettingsForm)
|
||||
|
||||
def extra_tab_content(self):
|
||||
return _l('Restock & Price Detection')
|
||||
@@ -48,34 +48,34 @@ class processor_settings_form(processor_text_json_diff_form):
|
||||
|
||||
output += """
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
<script>
|
||||
<script>
|
||||
$(document).ready(function () {
|
||||
toggleOpacity('#processor_config_restock_diff-follow_price_changes', '.price-change-minmax', true);
|
||||
toggleOpacity('#restock_settings-follow_price_changes', '.price-change-minmax', true);
|
||||
});
|
||||
</script>
|
||||
|
||||
<fieldset id="restock-fieldset-price-group">
|
||||
<div class="pure-control-group">
|
||||
<fieldset class="pure-group inline-radio">
|
||||
{{ render_field(form.processor_config_restock_diff.in_stock_processing) }}
|
||||
{{ render_field(form.restock_settings.in_stock_processing) }}
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.processor_config_restock_diff.follow_price_changes) }}
|
||||
{{ render_checkbox_field(form.restock_settings.follow_price_changes) }}
|
||||
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.restock_settings.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
{{ render_field(form.restock_settings.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
||||
<span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group price-change-minmax">
|
||||
{{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }}
|
||||
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
|
||||
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
|
||||
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
|
||||
</fieldset>
|
||||
</fieldset>
|
||||
</div>
|
||||
</fieldset>
|
||||
"""
|
||||
|
||||
@@ -2,7 +2,6 @@ from ..base import difference_detection_processor
|
||||
from ..exceptions import ProcessorException
|
||||
from . import Restock
|
||||
from loguru import logger
|
||||
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
|
||||
|
||||
import urllib3
|
||||
import time
|
||||
@@ -57,259 +56,6 @@ def _deduplicate_prices(data):
|
||||
return list(unique_data)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MEMORY MANAGEMENT: Why We Use Multiprocessing (Linux Only)
|
||||
# =============================================================================
|
||||
#
|
||||
# The get_itemprop_availability() function uses 'extruct' to parse HTML metadata
|
||||
# (JSON-LD, microdata, OpenGraph, etc). Extruct internally uses lxml, which wraps
|
||||
# libxml2 - a C library that allocates memory at the C level.
|
||||
#
|
||||
# Memory Leak Problem:
|
||||
# --------------------
|
||||
# 1. lxml's document_fromstring() creates thousands of Python objects backed by
|
||||
# C-level allocations (nodes, attributes, text content)
|
||||
# 2. Python's garbage collector can mark these objects as collectible, but
|
||||
# cannot force the OS to reclaim the actual C-level memory
|
||||
# 3. malloc/free typically doesn't return memory to OS - it just marks it as
|
||||
# "free in the process address space"
|
||||
# 4. With repeated parsing of large HTML (5MB+ pages), memory accumulates even
|
||||
# after Python GC runs
|
||||
#
|
||||
# Why Multiprocessing Fixes This:
|
||||
# --------------------------------
|
||||
# When a subprocess exits, the OS forcibly reclaims ALL memory including C-level
|
||||
# allocations that Python GC couldn't release. This ensures clean memory state
|
||||
# after each extraction.
|
||||
#
|
||||
# Performance Impact:
|
||||
# -------------------
|
||||
# - Memray analysis showed 1.2M document_fromstring allocations per page
|
||||
# - Without subprocess: memory grows by ~50-500MB per parse and lingers
|
||||
# - With subprocess: ~35MB overhead but forces full cleanup after each run
|
||||
# - Trade-off: 35MB resource_tracker vs 500MB+ accumulated leak = much better at scale
|
||||
#
|
||||
# References:
|
||||
# -----------
|
||||
# - lxml memory issues: https://medium.com/devopss-hole/python-lxml-memory-leak-b8d0b1000dc7
|
||||
# - libxml2 caching behavior: https://www.mail-archive.com/lxml@python.org/msg00026.html
|
||||
# - GC limitations with C extensions: https://benbernardblog.com/tracking-down-a-freaky-python-memory-leak-part-2/
|
||||
#
|
||||
# Additional Context:
|
||||
# -------------------
|
||||
# - jsonpath_ng (used to query the parsed data) is pure Python and doesn't leak
|
||||
# - The leak is specifically from lxml's document parsing, not the JSONPath queries
|
||||
# - Linux-only because multiprocessing spawn is well-tested there; other platforms
|
||||
# use direct call as fallback
|
||||
#
|
||||
# Alternative Solution (Future Optimization):
|
||||
# -------------------------------------------
|
||||
# This entire problem could be avoided by using regex to extract just the machine
|
||||
# data blocks (JSON-LD, microdata, OpenGraph tags) BEFORE parsing with lxml:
|
||||
#
|
||||
# 1. Use regex to extract <script type="application/ld+json">...</script> blocks
|
||||
# 2. Use regex to extract <meta property="og:*"> tags
|
||||
# 3. Use regex to find itemprop/itemtype attributes and their containing elements
|
||||
# 4. Parse ONLY those extracted snippets instead of the entire HTML document
|
||||
#
|
||||
# Benefits:
|
||||
# - Avoids parsing 5MB of HTML when we only need a few KB of metadata
|
||||
# - Eliminates the lxml memory leak entirely
|
||||
# - Faster extraction (regex is much faster than DOM parsing)
|
||||
# - No subprocess overhead needed
|
||||
#
|
||||
# Trade-offs:
|
||||
# - Regex for HTML is brittle (comments, CDATA, edge cases)
|
||||
# - Microdata extraction would be complex (need to track element boundaries)
|
||||
# - Would need extensive testing to ensure we don't miss valid data
|
||||
# - extruct is battle-tested; regex solution would need similar maturity
|
||||
#
|
||||
# For now, the subprocess approach is safer and leverages existing extruct code.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _extract_itemprop_availability_worker(pipe_conn):
|
||||
"""
|
||||
Subprocess worker for itemprop extraction (Linux memory management).
|
||||
|
||||
Uses spawn multiprocessing to isolate extruct/lxml memory allocations.
|
||||
When the subprocess exits, the OS reclaims ALL memory including lxml's
|
||||
C-level allocations that Python's GC cannot release.
|
||||
|
||||
Args:
|
||||
pipe_conn: Pipe connection to receive HTML and send result
|
||||
"""
|
||||
import json
|
||||
import gc
|
||||
|
||||
html_content = None
|
||||
result_data = None
|
||||
|
||||
try:
|
||||
# Receive HTML as raw bytes (no pickle)
|
||||
html_bytes = pipe_conn.recv_bytes()
|
||||
html_content = html_bytes.decode('utf-8')
|
||||
|
||||
# Explicitly delete html_bytes to free memory
|
||||
del html_bytes
|
||||
gc.collect()
|
||||
|
||||
# Perform extraction in subprocess (uses extruct/lxml)
|
||||
result_data = get_itemprop_availability(html_content)
|
||||
|
||||
# Convert Restock object to dict for JSON serialization
|
||||
result = {
|
||||
'success': True,
|
||||
'data': dict(result_data) if result_data else {}
|
||||
}
|
||||
pipe_conn.send_bytes(json.dumps(result).encode('utf-8'))
|
||||
|
||||
# Clean up before exit
|
||||
del result_data, html_content, result
|
||||
gc.collect()
|
||||
|
||||
except MoreThanOnePriceFound:
|
||||
# Serialize the specific exception type
|
||||
result = {
|
||||
'success': False,
|
||||
'exception_type': 'MoreThanOnePriceFound'
|
||||
}
|
||||
pipe_conn.send_bytes(json.dumps(result).encode('utf-8'))
|
||||
|
||||
except Exception as e:
|
||||
# Serialize other exceptions
|
||||
result = {
|
||||
'success': False,
|
||||
'exception_type': type(e).__name__,
|
||||
'exception_message': str(e)
|
||||
}
|
||||
pipe_conn.send_bytes(json.dumps(result).encode('utf-8'))
|
||||
|
||||
finally:
|
||||
# Final cleanup before subprocess exits
|
||||
# Variables may already be deleted in try block, so use try/except
|
||||
try:
|
||||
del html_content
|
||||
except (NameError, UnboundLocalError):
|
||||
pass
|
||||
try:
|
||||
del result_data
|
||||
except (NameError, UnboundLocalError):
|
||||
pass
|
||||
gc.collect()
|
||||
pipe_conn.close()
|
||||
|
||||
|
||||
def extract_itemprop_availability_safe(html_content) -> Restock:
|
||||
"""
|
||||
Extract itemprop availability with hybrid approach for memory efficiency.
|
||||
|
||||
Strategy (fastest to slowest, least to most memory):
|
||||
1. Try pure Python extraction (JSON-LD, OpenGraph, microdata) - covers 80%+ of cases
|
||||
2. Fall back to extruct with subprocess isolation on Linux for complex cases
|
||||
|
||||
Args:
|
||||
html_content: HTML string to parse
|
||||
|
||||
Returns:
|
||||
Restock: Extracted availability data
|
||||
|
||||
Raises:
|
||||
MoreThanOnePriceFound: When multiple prices detected
|
||||
Other exceptions: From extruct/parsing
|
||||
"""
|
||||
import platform
|
||||
|
||||
# Step 1: Try pure Python extraction first (fast, no lxml, no memory leak)
|
||||
try:
|
||||
from .pure_python_extractor import extract_metadata_pure_python, query_price_availability
|
||||
|
||||
logger.trace("Attempting pure Python metadata extraction (no lxml)")
|
||||
extracted_data = extract_metadata_pure_python(html_content)
|
||||
price_data = query_price_availability(extracted_data)
|
||||
|
||||
# If we got price AND availability, we're done!
|
||||
if price_data.get('price') and price_data.get('availability'):
|
||||
result = Restock(price_data)
|
||||
logger.debug(f"Pure Python extraction successful: {dict(result)}")
|
||||
return result
|
||||
|
||||
# If we got some data but not everything, still try extruct for completeness
|
||||
if price_data.get('price') or price_data.get('availability'):
|
||||
logger.debug(f"Pure Python extraction partial: {price_data}, will try extruct for completeness")
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Pure Python extraction failed: {e}, falling back to extruct")
|
||||
|
||||
# Step 2: Fall back to extruct (uses lxml, needs subprocess on Linux)
|
||||
logger.trace("Falling back to extruct (lxml-based) with subprocess isolation")
|
||||
|
||||
# Only use subprocess isolation on Linux
|
||||
# Other platforms may have issues with spawn or don't need the aggressive memory management
|
||||
if platform.system() == 'Linux':
|
||||
import multiprocessing
|
||||
import json
|
||||
import gc
|
||||
|
||||
try:
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=_extract_itemprop_availability_worker, args=(child_conn,))
|
||||
p.start()
|
||||
|
||||
# Send HTML as raw bytes (no pickle)
|
||||
html_bytes = html_content.encode('utf-8')
|
||||
parent_conn.send_bytes(html_bytes)
|
||||
|
||||
# Explicitly delete html_bytes copy immediately after sending
|
||||
del html_bytes
|
||||
gc.collect()
|
||||
|
||||
# Receive result as JSON
|
||||
result_bytes = parent_conn.recv_bytes()
|
||||
result = json.loads(result_bytes.decode('utf-8'))
|
||||
|
||||
# Wait for subprocess to complete
|
||||
p.join()
|
||||
|
||||
# Close pipes
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
|
||||
# Clean up all subprocess-related objects
|
||||
del p, parent_conn, child_conn, result_bytes
|
||||
gc.collect()
|
||||
|
||||
# Handle result or re-raise exception
|
||||
if result['success']:
|
||||
# Reconstruct Restock object from dict
|
||||
restock_obj = Restock(result['data'])
|
||||
# Clean up result dict
|
||||
del result
|
||||
gc.collect()
|
||||
return restock_obj
|
||||
else:
|
||||
# Re-raise the exception that occurred in subprocess
|
||||
exception_type = result['exception_type']
|
||||
exception_msg = result.get('exception_message', '')
|
||||
del result
|
||||
gc.collect()
|
||||
|
||||
if exception_type == 'MoreThanOnePriceFound':
|
||||
raise MoreThanOnePriceFound()
|
||||
else:
|
||||
raise Exception(f"{exception_type}: {exception_msg}")
|
||||
|
||||
except Exception as e:
|
||||
# If multiprocessing itself fails, log and fall back to direct call
|
||||
logger.warning(f"Subprocess extraction failed: {e}, falling back to direct call")
|
||||
gc.collect()
|
||||
return get_itemprop_availability(html_content)
|
||||
else:
|
||||
# Non-Linux: direct call (no subprocess overhead needed)
|
||||
return get_itemprop_availability(html_content)
|
||||
|
||||
|
||||
# should return Restock()
|
||||
# add casting?
|
||||
def get_itemprop_availability(html_content) -> Restock:
|
||||
@@ -404,82 +150,61 @@ class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
def run_changedetection(self, watch):
|
||||
import hashlib
|
||||
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
current_raw_document_checksum = self.get_raw_document_checksum()
|
||||
# Skip processing only if BOTH conditions are true:
|
||||
# 1. HTML content unchanged (checksum matches last saved checksum)
|
||||
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
|
||||
# The was_edited flag handles all watch configuration changes, so we don't need
|
||||
# separate checks for trigger_text or other processing rules.
|
||||
if (not force_reprocess and
|
||||
not watch.was_edited and
|
||||
self.last_raw_content_checksum and
|
||||
self.last_raw_content_checksum == current_raw_document_checksum):
|
||||
raise checksumFromPreviousCheckWasTheSame()
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()}
|
||||
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Track the content type (readonly field, doesn't trigger was_edited)
|
||||
update_obj['content-type'] = self.fetcher.headers.get('Content-Type', '') # Use hyphen (matches OpenAPI spec)
|
||||
# Track the content type
|
||||
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# Save the raw content checksum to file (processor implementation detail, not watch config)
|
||||
self.update_last_raw_content_checksum(current_raw_document_checksum)
|
||||
|
||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||
#useless
|
||||
# from ...html_tools import html_to_text
|
||||
# text = html_to_text(self.fetcher.content)
|
||||
# logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
# if not len(text):
|
||||
# from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
# raise ReplyWithContentButNoText(url=watch.link,
|
||||
# status_code=self.fetcher.get_last_status_code(),
|
||||
# screenshot=self.fetcher.screenshot,
|
||||
# html_content=self.fetcher.content,
|
||||
# xpath_data=self.fetcher.xpath_data
|
||||
# )
|
||||
from ...html_tools import html_to_text
|
||||
text = html_to_text(self.fetcher.content)
|
||||
logger.debug(f"Length of text after conversion: {len(text)}")
|
||||
if not len(text):
|
||||
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
raise ReplyWithContentButNoText(url=watch.link,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
html_content=self.fetcher.content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Which restock settings to compare against?
|
||||
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
|
||||
_extra_config = self.get_extra_watch_config('restock_diff.json')
|
||||
restock_settings = _extra_config.get('restock_diff') or {
|
||||
'follow_price_changes': True,
|
||||
'in_stock_processing': 'in_stock_only',
|
||||
}
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
|
||||
# See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {})
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('processor_config_restock_diff') or {}
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
|
||||
break
|
||||
|
||||
|
||||
itemprop_availability = {}
|
||||
multiple_prices_found = False
|
||||
|
||||
# Try built-in extraction first, this will scan metadata in the HTML
|
||||
# On Linux, this runs in a subprocess to prevent lxml/extruct memory leaks
|
||||
try:
|
||||
itemprop_availability = extract_itemprop_availability_safe(self.fetcher.content)
|
||||
itemprop_availability = get_itemprop_availability(self.fetcher.content)
|
||||
except MoreThanOnePriceFound as e:
|
||||
# Don't raise immediately - let plugins try to handle this case
|
||||
# Plugins might be able to determine which price is correct
|
||||
logger.warning(f"Built-in detection found multiple prices on {watch.get('url')}, will try plugin override")
|
||||
multiple_prices_found = True
|
||||
itemprop_availability = {}
|
||||
# Add the real data
|
||||
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# If built-in extraction didn't get both price AND availability, try plugin override
|
||||
# Only check plugin if this watch is using a fetcher that might provide better data
|
||||
@@ -491,21 +216,9 @@ class perform_site_check(difference_detection_processor):
|
||||
from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
|
||||
fetcher_name = watch.get('fetch_backend', 'html_requests')
|
||||
|
||||
# Resolve 'system' to the actual fetcher being used
|
||||
# This allows plugins to work even when watch uses "system settings default"
|
||||
if fetcher_name == 'system':
|
||||
# Get the actual fetcher that was used (from self.fetcher)
|
||||
# Fetcher class name gives us the actual backend (e.g., 'html_requests', 'html_webdriver')
|
||||
actual_fetcher = type(self.fetcher).__name__
|
||||
if 'html_requests' in actual_fetcher.lower():
|
||||
fetcher_name = 'html_requests'
|
||||
elif 'webdriver' in actual_fetcher.lower() or 'playwright' in actual_fetcher.lower():
|
||||
fetcher_name = 'html_webdriver'
|
||||
logger.debug(f"Resolved 'system' fetcher to actual fetcher: {fetcher_name}")
|
||||
|
||||
# Try plugin override - plugins can decide if they support this fetcher
|
||||
if fetcher_name:
|
||||
logger.debug(f"Calling extra plugins for getting item price/availability (fetcher: {fetcher_name})")
|
||||
# Only try plugin override if not using system default (which might be anything)
|
||||
if fetcher_name and fetcher_name != 'system':
|
||||
logger.debug("Calling extra plugins for getting item price/availability")
|
||||
plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)
|
||||
|
||||
if plugin_availability:
|
||||
@@ -520,16 +233,6 @@ class perform_site_check(difference_detection_processor):
|
||||
if not plugin_availability:
|
||||
logger.debug("No item price/availability from plugins")
|
||||
|
||||
# If we had multiple prices and plugins also failed, NOW raise the exception
|
||||
if multiple_prices_found and not itemprop_availability.get('price'):
|
||||
raise ProcessorException(
|
||||
message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Something valid in get_itemprop_availability() by scraping metadata ?
|
||||
if itemprop_availability.get('price') or itemprop_availability.get('availability'):
|
||||
# Store for other usage
|
||||
|
||||
@@ -1,289 +0,0 @@
|
||||
"""
|
||||
Pure Python metadata extractor - no lxml, no memory leaks.
|
||||
|
||||
This module provides a fast, memory-efficient alternative to extruct for common
|
||||
e-commerce metadata extraction. It handles:
|
||||
- JSON-LD (covers 80%+ of modern sites)
|
||||
- OpenGraph meta tags
|
||||
- Basic microdata attributes
|
||||
|
||||
Uses Python's built-in html.parser instead of lxml/libxml2, avoiding C-level
|
||||
memory allocation issues. For edge cases, the main processor can fall back to
|
||||
extruct (with subprocess isolation on Linux).
|
||||
"""
|
||||
|
||||
from html.parser import HTMLParser
|
||||
import json
|
||||
import re
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class JSONLDExtractor(HTMLParser):
|
||||
"""
|
||||
Extract JSON-LD structured data from HTML.
|
||||
|
||||
Finds all <script type="application/ld+json"> tags and parses their content.
|
||||
Handles multiple JSON-LD blocks on the same page.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.in_jsonld = False
|
||||
self.data = [] # List of all parsed JSON-LD objects
|
||||
self.current_script = []
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == 'script':
|
||||
# Check if this is a JSON-LD script tag
|
||||
for attr, value in attrs:
|
||||
if attr == 'type' and value == 'application/ld+json':
|
||||
self.in_jsonld = True
|
||||
self.current_script = []
|
||||
break
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.in_jsonld:
|
||||
self.current_script.append(data)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag == 'script' and self.in_jsonld:
|
||||
# Parse the accumulated script content
|
||||
script_content = ''.join(self.current_script)
|
||||
if script_content.strip():
|
||||
try:
|
||||
# Parse JSON (handles both objects and arrays)
|
||||
parsed = json.loads(script_content)
|
||||
if isinstance(parsed, list):
|
||||
self.data.extend(parsed)
|
||||
else:
|
||||
self.data.append(parsed)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"Failed to parse JSON-LD: {e}")
|
||||
pass
|
||||
|
||||
self.in_jsonld = False
|
||||
self.current_script = []
|
||||
|
||||
|
||||
class OpenGraphExtractor(HTMLParser):
|
||||
"""
|
||||
Extract OpenGraph meta tags from HTML.
|
||||
|
||||
Finds <meta property="og:*"> tags commonly used for social media sharing.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.og_data = {}
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
if tag == 'meta':
|
||||
attrs_dict = dict(attrs)
|
||||
prop = attrs_dict.get('property', '')
|
||||
|
||||
# Extract OpenGraph properties
|
||||
if prop.startswith('og:'):
|
||||
content = attrs_dict.get('content', '')
|
||||
if content:
|
||||
self.og_data[prop] = content
|
||||
|
||||
|
||||
class MicrodataExtractor(HTMLParser):
|
||||
"""
|
||||
Extract basic microdata attributes from HTML.
|
||||
|
||||
Finds elements with itemprop attributes. This is a simplified extractor
|
||||
that doesn't handle nested itemscope/itemtype hierarchies - for complex
|
||||
cases, use extruct as fallback.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.microdata = {}
|
||||
self.current_itemprop = None
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
attrs_dict = dict(attrs)
|
||||
|
||||
if 'itemprop' in attrs_dict:
|
||||
itemprop = attrs_dict['itemprop']
|
||||
|
||||
# Price/currency/availability can be in content/href attributes
|
||||
if itemprop == 'price':
|
||||
if 'content' in attrs_dict:
|
||||
self.microdata['price'] = attrs_dict['content']
|
||||
else:
|
||||
self.current_itemprop = 'price'
|
||||
|
||||
elif itemprop == 'priceCurrency':
|
||||
if 'content' in attrs_dict:
|
||||
self.microdata['currency'] = attrs_dict['content']
|
||||
else:
|
||||
self.current_itemprop = 'priceCurrency'
|
||||
|
||||
elif itemprop == 'availability':
|
||||
# Can be in href (link) or content (meta)
|
||||
if 'href' in attrs_dict:
|
||||
self.microdata['availability'] = attrs_dict['href']
|
||||
elif 'content' in attrs_dict:
|
||||
self.microdata['availability'] = attrs_dict['content']
|
||||
else:
|
||||
self.current_itemprop = 'availability'
|
||||
|
||||
def handle_data(self, data):
|
||||
# Capture text content for itemprop elements
|
||||
if self.current_itemprop == 'price':
|
||||
# Try to extract numeric price from text
|
||||
try:
|
||||
price_text = re.sub(r'[^\d.]', '', data.strip())
|
||||
if price_text:
|
||||
self.microdata['price'] = float(price_text)
|
||||
except ValueError:
|
||||
pass
|
||||
elif self.current_itemprop == 'priceCurrency':
|
||||
currency = data.strip()
|
||||
if currency:
|
||||
self.microdata['currency'] = currency
|
||||
elif self.current_itemprop == 'availability':
|
||||
availability = data.strip()
|
||||
if availability:
|
||||
self.microdata['availability'] = availability
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
# Reset current itemprop after closing tag
|
||||
self.current_itemprop = None
|
||||
|
||||
|
||||
def extract_metadata_pure_python(html_content):
|
||||
"""
|
||||
Extract structured metadata from HTML using pure Python parsers.
|
||||
|
||||
Returns a dict with three keys:
|
||||
- 'json-ld': List of parsed JSON-LD objects
|
||||
- 'opengraph': Dict of OpenGraph properties
|
||||
- 'microdata': Dict of microdata properties
|
||||
|
||||
Args:
|
||||
html_content: HTML string to parse
|
||||
|
||||
Returns:
|
||||
dict: Extracted metadata in three formats
|
||||
"""
|
||||
result = {
|
||||
'json-ld': [],
|
||||
'opengraph': {},
|
||||
'microdata': {}
|
||||
}
|
||||
|
||||
# Extract JSON-LD
|
||||
try:
|
||||
jsonld_extractor = JSONLDExtractor()
|
||||
jsonld_extractor.feed(html_content)
|
||||
result['json-ld'] = jsonld_extractor.data
|
||||
logger.trace(f"Pure Python: Found {len(jsonld_extractor.data)} JSON-LD blocks")
|
||||
except Exception as e:
|
||||
logger.debug(f"JSON-LD extraction failed: {e}")
|
||||
|
||||
# Extract OpenGraph
|
||||
try:
|
||||
og_extractor = OpenGraphExtractor()
|
||||
og_extractor.feed(html_content)
|
||||
result['opengraph'] = og_extractor.og_data
|
||||
if result['opengraph']:
|
||||
logger.trace(f"Pure Python: Found {len(og_extractor.og_data)} OpenGraph tags")
|
||||
except Exception as e:
|
||||
logger.debug(f"OpenGraph extraction failed: {e}")
|
||||
|
||||
# Extract Microdata
|
||||
try:
|
||||
microdata_extractor = MicrodataExtractor()
|
||||
microdata_extractor.feed(html_content)
|
||||
result['microdata'] = microdata_extractor.microdata
|
||||
if result['microdata']:
|
||||
logger.trace(f"Pure Python: Found microdata: {result['microdata']}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Microdata extraction failed: {e}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def query_price_availability(extracted_data):
|
||||
"""
|
||||
Query extracted metadata for price and availability information.
|
||||
|
||||
Uses jsonpath_ng to query JSON-LD data (same approach as extruct).
|
||||
Falls back to OpenGraph and microdata if JSON-LD doesn't have the data.
|
||||
|
||||
Args:
|
||||
extracted_data: Dict from extract_metadata_pure_python()
|
||||
|
||||
Returns:
|
||||
dict: {'price': float, 'currency': str, 'availability': str}
|
||||
"""
|
||||
from jsonpath_ng import parse
|
||||
|
||||
result = {}
|
||||
|
||||
# 1. Try JSON-LD first (most reliable and common)
|
||||
for data in extracted_data.get('json-ld', []):
|
||||
try:
|
||||
# Use jsonpath to find price/availability anywhere in the structure
|
||||
price_parse = parse('$..(price|Price)')
|
||||
availability_parse = parse('$..(availability|Availability)')
|
||||
currency_parse = parse('$..(priceCurrency|currency|priceCurrency)')
|
||||
|
||||
price_results = [m.value for m in price_parse.find(data)]
|
||||
if price_results and not result.get('price'):
|
||||
# Handle various price formats
|
||||
price_val = price_results[0]
|
||||
if isinstance(price_val, (int, float)):
|
||||
result['price'] = float(price_val)
|
||||
elif isinstance(price_val, str):
|
||||
# Extract numeric value from string
|
||||
try:
|
||||
result['price'] = float(re.sub(r'[^\d.]', '', price_val))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
avail_results = [m.value for m in availability_parse.find(data)]
|
||||
if avail_results and not result.get('availability'):
|
||||
result['availability'] = str(avail_results[0])
|
||||
|
||||
curr_results = [m.value for m in currency_parse.find(data)]
|
||||
if curr_results and not result.get('currency'):
|
||||
result['currency'] = str(curr_results[0])
|
||||
|
||||
# If we found price, this JSON-LD block is good
|
||||
if result.get('price'):
|
||||
logger.debug(f"Pure Python: Found price data in JSON-LD: {result}")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error querying JSON-LD: {e}")
|
||||
continue
|
||||
|
||||
# 2. Try OpenGraph if JSON-LD didn't provide everything
|
||||
og_data = extracted_data.get('opengraph', {})
|
||||
if not result.get('price') and 'og:price:amount' in og_data:
|
||||
try:
|
||||
result['price'] = float(og_data['og:price:amount'])
|
||||
except ValueError:
|
||||
pass
|
||||
if not result.get('currency') and 'og:price:currency' in og_data:
|
||||
result['currency'] = og_data['og:price:currency']
|
||||
if not result.get('availability') and 'og:availability' in og_data:
|
||||
result['availability'] = og_data['og:availability']
|
||||
|
||||
# 3. Use microdata as last resort
|
||||
microdata = extracted_data.get('microdata', {})
|
||||
if not result.get('price') and 'price' in microdata:
|
||||
result['price'] = microdata['price']
|
||||
if not result.get('currency') and 'currency' in microdata:
|
||||
result['currency'] = microdata['currency']
|
||||
if not result.get('availability') and 'availability' in microdata:
|
||||
result['availability'] = microdata['availability']
|
||||
|
||||
# result['price'] could be float or str here, depending on the website, for example it might contain "1,00" commas, etc.
|
||||
# using something like babel you need to know the locale of the website and even then it can be problematic
|
||||
# we dont really do anything with the price data so far.. so just accept it the way it comes.
|
||||
return result
|
||||
@@ -17,8 +17,7 @@ def _task(watch, update_handler):
|
||||
|
||||
try:
|
||||
# The slow process (we run 2 of these in parallel)
|
||||
# Always force reprocess for preview - we want to show the filtered content regardless of checksums
|
||||
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch, force_reprocess=True)
|
||||
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
|
||||
except FilterNotFoundInResponse as e:
|
||||
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
||||
except ReplyWithContentButNoText as e:
|
||||
@@ -36,7 +35,7 @@ def _task(watch, update_handler):
|
||||
|
||||
|
||||
def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
'''Used by @app.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])'''
|
||||
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
||||
from changedetectionio import forms, html_tools
|
||||
from changedetectionio.model.Watch import model as watch_model
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
@@ -56,7 +55,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
|
||||
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.data_dir):
|
||||
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
|
||||
# Splice in the temporary stuff from the form
|
||||
form = forms.processor_text_json_diff_form(formdata=form_data if request.method == 'POST' else None,
|
||||
data=form_data
|
||||
@@ -69,7 +68,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
blank_watch_no_filters['url'] = tmp_watch.get('url')
|
||||
|
||||
latest_filename = next(reversed(tmp_watch.history))
|
||||
html_fname = os.path.join(tmp_watch.data_dir, f"{latest_filename}.html.br")
|
||||
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
|
||||
|
||||
|
||||
@@ -154,7 +154,11 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||
is_html_webdriver = True
|
||||
|
||||
password_enabled_and_share_is_off = False
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
|
||||
@@ -7,7 +7,6 @@ import re
|
||||
import urllib3
|
||||
|
||||
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
|
||||
from ..base import difference_detection_processor
|
||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
@@ -347,7 +346,6 @@ class ContentProcessor:
|
||||
def extract_text_from_html(self, html_content, stream_content_type):
|
||||
"""Convert HTML to plain text."""
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
|
||||
return html_tools.html_to_text(
|
||||
html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
@@ -370,24 +368,12 @@ class ChecksumCalculator:
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, watch, force_reprocess=False):
|
||||
def run_changedetection(self, watch):
|
||||
changed_detected = False
|
||||
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
current_raw_document_checksum = self.get_raw_document_checksum()
|
||||
# Skip processing only if BOTH conditions are true:
|
||||
# 1. HTML content unchanged (checksum matches last saved checksum)
|
||||
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
|
||||
# The was_edited flag handles all watch configuration changes, so we don't need
|
||||
# separate checks for trigger_text or other processing rules.
|
||||
if (not force_reprocess and
|
||||
not watch.was_edited and
|
||||
self.last_raw_content_checksum and
|
||||
self.last_raw_content_checksum == current_raw_document_checksum):
|
||||
raise checksumFromPreviousCheckWasTheSame()
|
||||
|
||||
# Initialize components
|
||||
filter_config = FilterConfig(watch, self.datastore)
|
||||
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
|
||||
@@ -405,11 +391,9 @@ class perform_site_check(difference_detection_processor):
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Track the content type (readonly field, doesn't trigger was_edited)
|
||||
update_obj['content-type'] = ctype_header # Use hyphen (matches OpenAPI spec and watch_base default)
|
||||
|
||||
# Save the raw content checksum to file (processor implementation detail, not watch config)
|
||||
self.update_last_raw_content_checksum(current_raw_document_checksum)
|
||||
# Track the content type and checksum before filters
|
||||
update_obj['content_type'] = ctype_header
|
||||
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||
|
||||
# === CONTENT PREPROCESSING ===
|
||||
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
|
||||
|
||||
@@ -29,11 +29,9 @@ def register_watch_operation_handlers(socketio, datastore):
|
||||
# Perform the operation
|
||||
if op == 'pause':
|
||||
watch.toggle_pause()
|
||||
watch.commit()
|
||||
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
||||
elif op == 'mute':
|
||||
watch.toggle_mute()
|
||||
watch.commit()
|
||||
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
||||
elif op == 'recheck':
|
||||
# Import here to avoid circular imports
|
||||
|
||||
@@ -199,31 +199,8 @@ def handle_watch_update(socketio, **kwargs):
|
||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
||||
|
||||
|
||||
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
|
||||
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
|
||||
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
|
||||
The exception is caught inside run_wsgi and routed to self.server.log() — it never
|
||||
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
|
||||
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
|
||||
disconnect correctly via its own disconnect handler and timeout logic."""
|
||||
try:
|
||||
from werkzeug.serving import BaseWSGIServer
|
||||
_original_log = BaseWSGIServer.log
|
||||
|
||||
def _filtered_log(self, type, message, *args):
|
||||
if type == 'error' and 'write() before start_response' in message:
|
||||
return
|
||||
_original_log(self, type, message, *args)
|
||||
|
||||
BaseWSGIServer.log = _filtered_log
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def init_socketio(app, datastore):
|
||||
"""Initialize SocketIO with the main Flask app"""
|
||||
_suppress_werkzeug_ws_abrupt_disconnect_noise()
|
||||
|
||||
import platform
|
||||
import sys
|
||||
|
||||
@@ -368,4 +345,4 @@ def init_socketio(app, datastore):
|
||||
|
||||
logger.info("Socket.IO initialized and attached to main Flask app")
|
||||
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
|
||||
return socketio
|
||||
return socketio
|
||||
@@ -44,12 +44,12 @@ data_sanity_test () {
|
||||
cd ..
|
||||
TMPDIR=$(mktemp -d)
|
||||
PORT_N=$((5000 + RANDOM % (6501 - 5000)))
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
|
||||
./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
|
||||
PID=$!
|
||||
sleep 5
|
||||
kill $PID
|
||||
sleep 2
|
||||
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR &
|
||||
./changedetection.py -p $PORT_N -d $TMPDIR &
|
||||
PID=$!
|
||||
sleep 5
|
||||
# On a restart the URL should still be there
|
||||
|
||||
@@ -17,6 +17,8 @@ $(document).ready(function () {
|
||||
set_scale();
|
||||
});
|
||||
// Should always be disabled
|
||||
$('#browser_steps-0-operation option[value="Goto site"]').prop("selected", "selected");
|
||||
$('#browser_steps-0-operation').attr('disabled', 'disabled');
|
||||
|
||||
$('#browsersteps-click-start').click(function () {
|
||||
$("#browsersteps-click-start").fadeOut();
|
||||
@@ -43,6 +45,12 @@ $(document).ready(function () {
|
||||
browsersteps_session_id = false;
|
||||
apply_buttons_disabled = false;
|
||||
ctx.clearRect(0, 0, c.width, c.height);
|
||||
set_first_gotosite_disabled();
|
||||
}
|
||||
|
||||
function set_first_gotosite_disabled() {
|
||||
$('#browser_steps >li:first-child select').val('Goto site').attr('disabled', 'disabled');
|
||||
$('#browser_steps >li:first-child').css('opacity', '0.5');
|
||||
}
|
||||
|
||||
// Show seconds remaining until the browser interface needs to restart the session
|
||||
@@ -235,54 +243,14 @@ $(document).ready(function () {
|
||||
ctx.fill();
|
||||
}
|
||||
|
||||
// Reusable AJAX function for browser step operations
|
||||
function executeBrowserStep(url, data = {}) {
|
||||
$('#browser-steps-ui .loader .spinner').fadeIn();
|
||||
apply_buttons_disabled = true;
|
||||
$('ul#browser_steps li .control .apply').css('opacity', 0.5);
|
||||
$("#browsersteps-img").css('opacity', 0.65);
|
||||
|
||||
return $.ajax({
|
||||
method: "POST",
|
||||
url: url,
|
||||
data: data,
|
||||
statusCode: {
|
||||
400: function () {
|
||||
alert("There was a problem processing the request, please reload the page.");
|
||||
$("#loading-status-text").hide();
|
||||
$('#browser-steps-ui .loader .spinner').fadeOut();
|
||||
},
|
||||
401: function (data) {
|
||||
alert(data.responseText);
|
||||
$("#loading-status-text").hide();
|
||||
$('#browser-steps-ui .loader .spinner').fadeOut();
|
||||
}
|
||||
}
|
||||
}).done(function (data) {
|
||||
xpath_data = data.xpath_data;
|
||||
$('#browsersteps-img').attr('src', data.screenshot);
|
||||
$('#browser-steps-ui .loader .spinner').fadeOut();
|
||||
apply_buttons_disabled = false;
|
||||
$("#browsersteps-img").css('opacity', 1);
|
||||
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
||||
$("#loading-status-text").hide();
|
||||
}).fail(function (data) {
|
||||
console.log(data);
|
||||
if (data.responseText && data.responseText.includes("Browser session expired")) {
|
||||
disable_browsersteps_ui();
|
||||
}
|
||||
apply_buttons_disabled = false;
|
||||
$("#loading-status-text").hide();
|
||||
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
||||
$("#browsersteps-img").css('opacity', 1);
|
||||
});
|
||||
}
|
||||
|
||||
function start() {
|
||||
console.log("Starting browser-steps UI");
|
||||
browsersteps_session_id = false;
|
||||
// @todo This setting of the first one should be done at the datalayer but wtforms doesnt wanna play nice
|
||||
$('#browser_steps >li:first-child').removeClass('empty');
|
||||
set_first_gotosite_disabled();
|
||||
$('#browser-steps-ui .loader .spinner').show();
|
||||
// Request a new session
|
||||
$('.clear,.remove', $('#browser_steps >li:first-child')).hide();
|
||||
$.ajax({
|
||||
type: "GET",
|
||||
url: browser_steps_start_url,
|
||||
@@ -299,12 +267,11 @@ $(document).ready(function () {
|
||||
}).done(function (data) {
|
||||
$("#loading-status-text").fadeIn();
|
||||
browsersteps_session_id = data.browsersteps_session_id;
|
||||
// This should trigger 'Goto site'
|
||||
console.log("Got startup response, requesting Goto-Site (first) step fake click");
|
||||
$('#browser_steps >li:first-child .apply').click();
|
||||
browser_interface_seconds_remaining = 500;
|
||||
// Request goto_site operation
|
||||
executeBrowserStep(
|
||||
browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id + "&goto_website_url_first_step=true"
|
||||
);
|
||||
|
||||
set_first_gotosite_disabled();
|
||||
}).fail(function (data) {
|
||||
console.log(data);
|
||||
alert('There was an error communicating with the server.');
|
||||
@@ -313,6 +280,7 @@ $(document).ready(function () {
|
||||
}
|
||||
|
||||
function disable_browsersteps_ui() {
|
||||
set_first_gotosite_disabled();
|
||||
$("#browser-steps-ui").css('opacity', '0.3');
|
||||
$('#browsersteps-selector-canvas').off("mousemove mousedown click");
|
||||
}
|
||||
@@ -360,13 +328,16 @@ $(document).ready(function () {
|
||||
// Add the extra buttons to the steps
|
||||
$('ul#browser_steps li').each(function (i) {
|
||||
var s = '<div class="control">' + '<a data-step-index=' + i + ' class="pure-button button-secondary button-green button-xsmall apply" >Apply</a> ';
|
||||
s += `<a data-step-index="${i}" class="pure-button button-secondary button-xsmall clear" >Clear</a> ` +
|
||||
`<a data-step-index="${i}" class="pure-button button-secondary button-red button-xsmall remove" >Remove</a>`;
|
||||
if (i > 0) {
|
||||
// The first step never gets these (Goto-site)
|
||||
s += `<a data-step-index="${i}" class="pure-button button-secondary button-xsmall clear" >Clear</a> ` +
|
||||
`<a data-step-index="${i}" class="pure-button button-secondary button-red button-xsmall remove" >Remove</a>`;
|
||||
|
||||
// if a screenshot is available
|
||||
if (browser_steps_available_screenshots.includes(i.toString())) {
|
||||
var d = (browser_steps_last_error_step === i+1) ? 'before' : 'after';
|
||||
s += ` <a data-step-index="${i}" class="pure-button button-secondary button-xsmall show-screenshot" title="Show screenshot from last run" data-type="${d}">Pic</a> `;
|
||||
// if a screenshot is available
|
||||
if (browser_steps_available_screenshots.includes(i.toString())) {
|
||||
var d = (browser_steps_last_error_step === i+1) ? 'before' : 'after';
|
||||
s += ` <a data-step-index="${i}" class="pure-button button-secondary button-xsmall show-screenshot" title="Show screenshot from last run" data-type="${d}">Pic</a> `;
|
||||
}
|
||||
}
|
||||
s += '</div>';
|
||||
$(this).append(s)
|
||||
@@ -405,35 +376,80 @@ $(document).ready(function () {
|
||||
});
|
||||
|
||||
$('ul#browser_steps li .control .apply').click(function (event) {
|
||||
// sequential requests @todo refactor
|
||||
if (apply_buttons_disabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
var current_data = $(event.currentTarget).closest('li');
|
||||
$('#browser-steps-ui .loader .spinner').fadeIn();
|
||||
apply_buttons_disabled = true;
|
||||
$('ul#browser_steps li .control .apply').css('opacity', 0.5);
|
||||
$("#browsersteps-img").css('opacity', 0.65);
|
||||
|
||||
var is_last_step = 0;
|
||||
var step_n = $(event.currentTarget).data('step-index');
|
||||
|
||||
// Determine if this is the last configured step
|
||||
var is_last_step = 0;
|
||||
// On the last step, we should also be getting data ready for the visual selector
|
||||
$('ul#browser_steps li select').each(function (i) {
|
||||
if ($(this).val() !== 'Choose one') {
|
||||
is_last_step += 1;
|
||||
}
|
||||
});
|
||||
is_last_step = (is_last_step == (step_n + 1));
|
||||
|
||||
if (is_last_step == (step_n + 1)) {
|
||||
is_last_step = true;
|
||||
} else {
|
||||
is_last_step = false;
|
||||
}
|
||||
|
||||
console.log("Requesting step via POST " + $("select[id$='operation']", current_data).first().val());
|
||||
|
||||
// Execute the browser step
|
||||
executeBrowserStep(
|
||||
browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id,
|
||||
{
|
||||
// POST the currently clicked step form widget back and await response, redraw
|
||||
$.ajax({
|
||||
method: "POST",
|
||||
url: browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id,
|
||||
data: {
|
||||
'operation': $("select[id$='operation']", current_data).first().val(),
|
||||
'selector': $("input[id$='selector']", current_data).first().val(),
|
||||
'optional_value': $("input[id$='optional_value']", current_data).first().val(),
|
||||
'step_n': step_n,
|
||||
'is_last_step': is_last_step
|
||||
},
|
||||
statusCode: {
|
||||
400: function () {
|
||||
// More than likely the CSRF token was lost when the server restarted
|
||||
alert("There was a problem processing the request, please reload the page.");
|
||||
$("#loading-status-text").hide();
|
||||
$('#browser-steps-ui .loader .spinner').fadeOut();
|
||||
},
|
||||
401: function (data) {
|
||||
// More than likely the CSRF token was lost when the server restarted
|
||||
alert(data.responseText);
|
||||
$("#loading-status-text").hide();
|
||||
$('#browser-steps-ui .loader .spinner').fadeOut();
|
||||
}
|
||||
}
|
||||
);
|
||||
}).done(function (data) {
|
||||
// it should return the new state (selectors available and screenshot)
|
||||
xpath_data = data.xpath_data;
|
||||
$('#browsersteps-img').attr('src', data.screenshot);
|
||||
$('#browser-steps-ui .loader .spinner').fadeOut();
|
||||
apply_buttons_disabled = false;
|
||||
$("#browsersteps-img").css('opacity', 1);
|
||||
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
||||
$("#loading-status-text").hide();
|
||||
set_first_gotosite_disabled();
|
||||
}).fail(function (data) {
|
||||
console.log(data);
|
||||
if (data.responseText.includes("Browser session expired")) {
|
||||
disable_browsersteps_ui();
|
||||
}
|
||||
apply_buttons_disabled = false;
|
||||
$("#loading-status-text").hide();
|
||||
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
||||
$("#browsersteps-img").css('opacity', 1);
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
$('ul#browser_steps li .control .show-screenshot').click(function (element) {
|
||||
|
||||
@@ -184,8 +184,7 @@ $(document).ready(function() {
|
||||
}
|
||||
// If it's a button in a form, submit the form
|
||||
else if ($element.is('button')) {
|
||||
// Use requestSubmit() to include the button's name/value in the form data
|
||||
$element.closest('form')[0].requestSubmit($element[0]);
|
||||
$element.closest('form').submit();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -116,14 +116,6 @@ $(document).ready(function () {
|
||||
$('#realtime-conn-error').show();
|
||||
});
|
||||
|
||||
// Tell the server we're leaving cleanly so it can release the connection
|
||||
// immediately rather than waiting for a timeout.
|
||||
// Note: this only fires for voluntary closes (tab/window close, navigation away).
|
||||
// Hard kills, crashes and network drops will still timeout normally on the server.
|
||||
window.addEventListener('beforeunload', function () {
|
||||
socket.disconnect();
|
||||
});
|
||||
|
||||
socket.on('queue_size', function (data) {
|
||||
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
||||
if(queueSizePagerInfoText) {
|
||||
|
||||
@@ -102,9 +102,7 @@
|
||||
}
|
||||
|
||||
// Navigate to search results (always redirect to watchlist home)
|
||||
// Use base_path if available (for sub-path deployments like /enlighten-richerx)
|
||||
const basePath = typeof base_path !== 'undefined' ? base_path : '';
|
||||
window.location.href = basePath + '/?' + params.toString();
|
||||
window.location.href = '/?' + params.toString();
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1 +1 @@
|
||||
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces;overflow-wrap:anywhere}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
|
||||
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
|
||||
|
||||
@@ -62,7 +62,6 @@ body.difference-page {
|
||||
|
||||
pre {
|
||||
white-space: break-spaces;
|
||||
overflow-wrap: anywhere;
|
||||
}
|
||||
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
+155
-244
@@ -22,8 +22,6 @@ import uuid as uuid_builder
|
||||
from loguru import logger
|
||||
from blinker import signal
|
||||
|
||||
from ..model.Tags import TagsDict
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
@@ -35,8 +33,9 @@ except ImportError:
|
||||
from ..processors import get_custom_watch_obj_for_processor
|
||||
|
||||
# Import the base class and helpers
|
||||
from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_json_atomic
|
||||
from .file_saving_datastore import FileSavingDataStore, load_all_watches, save_watch_atomic, save_json_atomic
|
||||
from .updates import DatastoreUpdatesMixin
|
||||
from .legacy_loader import has_legacy_datastore
|
||||
|
||||
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
|
||||
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
|
||||
@@ -57,7 +56,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
# Should only be active for docker
|
||||
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
|
||||
self.datastore_path = datastore_path
|
||||
self.needs_write = False
|
||||
self.start_time = time.time()
|
||||
self.stop_thread = False
|
||||
self.save_version_copy_json_db(version_tag)
|
||||
self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag)
|
||||
|
||||
@@ -79,7 +80,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
logger.info(f"Backing up changedetection.json due to new version to '{db_path_version_backup}'.")
|
||||
copyfile(db_path, db_path_version_backup)
|
||||
|
||||
def _load_settings(self, filename="changedetection.json"):
|
||||
def _load_settings(self):
|
||||
"""
|
||||
Load settings from storage.
|
||||
|
||||
@@ -88,7 +89,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
Returns:
|
||||
dict: Settings data loaded from storage
|
||||
"""
|
||||
changedetection_json = os.path.join(self.datastore_path, filename)
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
|
||||
logger.info(f"Loading settings from {changedetection_json}")
|
||||
|
||||
@@ -123,28 +124,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
if 'application' in settings_data['settings']:
|
||||
self.__data['settings']['application'].update(settings_data['settings']['application'])
|
||||
|
||||
# Use our Tags dict with cleanup helpers etc
|
||||
# @todo Same for Watches
|
||||
existing_tags = settings_data.get('settings', {}).get('application', {}).get('tags') or {}
|
||||
self.__data['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=self.datastore_path)
|
||||
|
||||
# More or less for the old format which had this data in the one url-watches.json
|
||||
# cant hurt to leave it here,
|
||||
if 'watching' in settings_data:
|
||||
self.__data['watching'].update(settings_data['watching'])
|
||||
|
||||
def _rehydrate_tags(self):
|
||||
"""Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
|
||||
from ..model import Tag
|
||||
|
||||
"""Rehydrate tag entities from stored data."""
|
||||
for uuid, tag in self.__data['settings']['application']['tags'].items():
|
||||
# Force processor to restock_diff for override functionality (technical debt)
|
||||
tag['processor'] = 'restock_diff'
|
||||
|
||||
self.__data['settings']['application']['tags'][uuid] = Tag.model(
|
||||
datastore_path=self.datastore_path,
|
||||
__datastore=self.__data,
|
||||
default=tag
|
||||
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(
|
||||
uuid, tag, processor_override='restock_diff'
|
||||
)
|
||||
logger.info(f"Tag: {uuid} {tag['title']}")
|
||||
|
||||
@@ -157,34 +141,25 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
logger.info(f"Rehydrating {watch_count} watches...")
|
||||
watching_rehydrated = {}
|
||||
for uuid, watch_dict in self.__data.get('watching', {}).items():
|
||||
if isinstance(watch_dict, dict):
|
||||
watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
|
||||
else:
|
||||
logger.error(f"Watch UUID {uuid} already rehydrated")
|
||||
|
||||
watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
|
||||
self.__data['watching'] = watching_rehydrated
|
||||
logger.success(f"Rehydrated {watch_count} watches into Watch objects")
|
||||
|
||||
|
||||
def _load_state(self, main_settings_filename="changedetection.json"):
|
||||
def _load_state(self):
|
||||
"""
|
||||
Load complete datastore state from storage.
|
||||
|
||||
Orchestrates loading of settings, watches, and tags using polymorphic methods.
|
||||
Orchestrates loading of settings and watches using polymorphic methods.
|
||||
"""
|
||||
# Load settings
|
||||
settings_data = self._load_settings(filename=main_settings_filename)
|
||||
settings_data = self._load_settings()
|
||||
self._apply_settings(settings_data)
|
||||
|
||||
# Load watches, scan them from the disk
|
||||
# Load watches (polymorphic - parent class method)
|
||||
self._load_watches()
|
||||
self._rehydrate_watches()
|
||||
|
||||
# Load tags from individual tag.json files
|
||||
# These will override any tags in settings (migration path)
|
||||
self._load_tags()
|
||||
|
||||
# Rehydrate any remaining tags from settings (legacy/fallback)
|
||||
# Rehydrate tags
|
||||
self._rehydrate_tags()
|
||||
|
||||
def reload_state(self, datastore_path, include_default_watches, version_tag):
|
||||
@@ -203,7 +178,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
self.datastore_path = datastore_path
|
||||
|
||||
# Initialize data structure
|
||||
self.__data = App.model(datastore_path=datastore_path)
|
||||
self.__data = App.model()
|
||||
self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")
|
||||
|
||||
# Base definition for all watchers (deepcopy part of #569)
|
||||
@@ -216,75 +191,88 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
# Check if datastore already exists
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
changedetection_json_old_schema = os.path.join(self.datastore_path, "url-watches.json")
|
||||
|
||||
if os.path.exists(changedetection_json):
|
||||
# Run schema updates if needed
|
||||
# Pass current schema version from loaded datastore (defaults to 0 if not set)
|
||||
# Load existing datastore (changedetection.json + watch.json files)
|
||||
logger.info("Loading existing datastore")
|
||||
self._load_state()
|
||||
current_schema = self.data['settings']['application'].get('schema_version', 0)
|
||||
self.run_updates(current_schema_version=current_schema)
|
||||
try:
|
||||
self._load_state()
|
||||
except Exception as e:
|
||||
logger.critical(f"Failed to load datastore: {e}")
|
||||
raise
|
||||
|
||||
# Legacy datastore detected - trigger migration, even works if the schema is much before the migration step.
|
||||
elif os.path.exists(changedetection_json_old_schema):
|
||||
|
||||
logger.critical(f"Legacy datastore detected at {changedetection_json_old_schema}, loading and running updates")
|
||||
self._load_state(main_settings_filename="url-watches.json")
|
||||
# update 26 will load the whole old config from disk to __data
|
||||
# Run schema updates if needed
|
||||
# Pass current schema version from loaded datastore (defaults to 0 if not set)
|
||||
current_schema = self.__data['settings']['application'].get('schema_version', 0)
|
||||
self.run_updates(current_schema_version=current_schema)
|
||||
# Probably tags were also shifted to disk and many other changes, so best to reload here.
|
||||
self._load_state()
|
||||
|
||||
else:
|
||||
# No datastore yet - check if this is a fresh install or legacy migration
|
||||
self.init_fresh_install(include_default_watches=include_default_watches,
|
||||
version_tag=version_tag)
|
||||
# Maybe they copied a bunch of watch subdirs across too
|
||||
self._load_state()
|
||||
# Generate app_guid FIRST (required for all operations)
|
||||
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
|
||||
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
|
||||
else:
|
||||
self.__data['app_guid'] = str(uuid_builder.uuid4())
|
||||
|
||||
def init_fresh_install(self, include_default_watches, version_tag):
|
||||
# Generate app_guid FIRST (required for all operations)
|
||||
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
|
||||
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
|
||||
else:
|
||||
self.__data['app_guid'] = str(uuid_builder.uuid4())
|
||||
# Generate RSS access token
|
||||
self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
|
||||
|
||||
# Generate RSS access token
|
||||
self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
|
||||
# Generate API access token
|
||||
self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
|
||||
|
||||
# Generate API access token
|
||||
self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
|
||||
logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
|
||||
# Check if legacy datastore exists (url-watches.json)
|
||||
if has_legacy_datastore(self.datastore_path):
|
||||
# Legacy datastore detected - trigger migration
|
||||
logger.critical(f"Legacy datastore detected at {self.datastore_path}/url-watches.json")
|
||||
logger.critical("Migration will be triggered via update_26")
|
||||
|
||||
# Set schema version to latest (no updates needed)
|
||||
latest_update_available = self.get_updates_available().pop()
|
||||
logger.info(f"Marking fresh install to schema version {latest_update_available}")
|
||||
self.__data['settings']['application']['schema_version'] = latest_update_available
|
||||
# Load the legacy datastore
|
||||
from .legacy_loader import load_legacy_format
|
||||
legacy_path = os.path.join(self.datastore_path, "url-watches.json")
|
||||
legacy_data = load_legacy_format(legacy_path)
|
||||
|
||||
# Add default watches if requested
|
||||
if include_default_watches:
|
||||
self.add_watch(
|
||||
url='https://news.ycombinator.com/',
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'}
|
||||
)
|
||||
self.add_watch(
|
||||
url='https://changedetection.io/CHANGELOG.txt',
|
||||
tag='changedetection.io',
|
||||
extras={'fetch_backend': 'html_requests'}
|
||||
)
|
||||
if not legacy_data:
|
||||
raise Exception("Failed to load legacy datastore from url-watches.json")
|
||||
|
||||
# Create changedetection.json immediately
|
||||
try:
|
||||
self._save_settings()
|
||||
logger.info("Created changedetection.json for new datastore")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create initial changedetection.json: {e}")
|
||||
# Store the loaded data
|
||||
self.__data = legacy_data
|
||||
|
||||
# CRITICAL: Rehydrate watches from dicts into Watch objects
|
||||
# This ensures watches have their methods available during migration
|
||||
self._rehydrate_watches()
|
||||
|
||||
# update_26 will save watches to individual files and create changedetection.json
|
||||
# Next startup will load from new format normally
|
||||
self.run_updates()
|
||||
|
||||
|
||||
else:
|
||||
# Fresh install - create new datastore
|
||||
logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
|
||||
|
||||
# Set schema version to latest (no updates needed)
|
||||
updates_available = self.get_updates_available()
|
||||
self.__data['settings']['application']['schema_version'] = updates_available.pop() if updates_available else 26
|
||||
|
||||
# Add default watches if requested
|
||||
if include_default_watches:
|
||||
self.add_watch(
|
||||
url='https://news.ycombinator.com/',
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'}
|
||||
)
|
||||
self.add_watch(
|
||||
url='https://changedetection.io/CHANGELOG.txt',
|
||||
tag='changedetection.io',
|
||||
extras={'fetch_backend': 'html_requests'}
|
||||
)
|
||||
|
||||
# Create changedetection.json immediately
|
||||
try:
|
||||
self._save_settings()
|
||||
logger.info("Created changedetection.json for new datastore")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create initial changedetection.json: {e}")
|
||||
|
||||
# Set version tag
|
||||
self.__data['version_tag'] = version_tag
|
||||
@@ -298,19 +286,19 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
|
||||
else:
|
||||
self.__data['app_guid'] = str(uuid_builder.uuid4())
|
||||
self.commit()
|
||||
self.mark_settings_dirty()
|
||||
|
||||
# Ensure RSS access token exists
|
||||
if not self.__data['settings']['application'].get('rss_access_token'):
|
||||
secret = secrets.token_hex(16)
|
||||
self.__data['settings']['application']['rss_access_token'] = secret
|
||||
self.commit()
|
||||
self.mark_settings_dirty()
|
||||
|
||||
# Ensure API access token exists
|
||||
if not self.__data['settings']['application'].get('api_access_token'):
|
||||
secret = secrets.token_hex(16)
|
||||
self.__data['settings']['application']['api_access_token'] = secret
|
||||
self.commit()
|
||||
self.mark_settings_dirty()
|
||||
|
||||
# Handle password reset lockfile
|
||||
password_reset_lockfile = os.path.join(self.datastore_path, "removepassword.lock")
|
||||
@@ -318,6 +306,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
self.remove_password()
|
||||
unlink(password_reset_lockfile)
|
||||
|
||||
# Start the background save thread
|
||||
self.start_save_thread()
|
||||
|
||||
def rehydrate_entity(self, uuid, entity, processor_override=None):
|
||||
"""Set the dict back to the dict Watch object"""
|
||||
entity['uuid'] = uuid
|
||||
@@ -350,25 +341,13 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
"""
|
||||
Build settings data structure for saving.
|
||||
|
||||
Tags behavior depends on schema version:
|
||||
- Before update_28 (schema < 28): Tags saved in settings for migration
|
||||
- After update_28 (schema >= 28): Tags excluded from settings (in individual files)
|
||||
|
||||
Returns:
|
||||
dict: Settings data ready for serialization
|
||||
"""
|
||||
import copy
|
||||
|
||||
# Deep copy settings to avoid modifying the original
|
||||
settings_copy = copy.deepcopy(self.__data['settings'])
|
||||
|
||||
# Is saved as {uuid}/tag.json
|
||||
settings_copy['application']['tags'] = {}
|
||||
|
||||
return {
|
||||
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
|
||||
'app_guid': self.__data.get('app_guid'),
|
||||
'settings': settings_copy,
|
||||
'note': 'Settings file - watches are stored in individual {uuid}/watch.json files',
|
||||
'app_guid': self.__data['app_guid'],
|
||||
'settings': self.__data['settings'],
|
||||
'build_sha': self.__data.get('build_sha'),
|
||||
'version_tag': self.__data.get('version_tag')
|
||||
}
|
||||
@@ -386,7 +365,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
"""
|
||||
settings_data = self._build_settings_data()
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
save_json_atomic(changedetection_json, settings_data, label="settings")
|
||||
save_json_atomic(changedetection_json, settings_data, label="settings", max_size_mb=10)
|
||||
|
||||
def _load_watches(self):
|
||||
"""
|
||||
@@ -396,45 +375,22 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
Implementation of abstract method from FileSavingDataStore.
|
||||
Delegates to helper function and stores results in internal data structure.
|
||||
"""
|
||||
|
||||
# Store loaded data
|
||||
# @note this will also work for the old legacy format because self.__data['watching'] should already have them loaded by this point.
|
||||
self.__data['watching'].update(load_all_watches(
|
||||
watching, watch_hashes = load_all_watches(
|
||||
self.datastore_path,
|
||||
self.rehydrate_entity
|
||||
))
|
||||
logger.debug(f"Loaded {len(self.__data['watching'])} watches")
|
||||
|
||||
def _load_tags(self):
|
||||
"""
|
||||
Load all tags from storage.
|
||||
|
||||
File backend implementation: reads individual tag.json files.
|
||||
Tags loaded from files override any tags in settings (migration path).
|
||||
"""
|
||||
from ..model import Tag
|
||||
|
||||
def rehydrate_tag(uuid, entity_dict):
|
||||
"""Rehydrate tag as Tag object with forced restock_diff processor."""
|
||||
entity_dict['uuid'] = uuid
|
||||
entity_dict['processor'] = 'restock_diff' # Force processor for override functionality
|
||||
|
||||
return Tag.model(
|
||||
datastore_path=self.datastore_path,
|
||||
__datastore=self.__data,
|
||||
default=entity_dict
|
||||
)
|
||||
|
||||
tags = load_all_tags(
|
||||
self.datastore_path,
|
||||
rehydrate_tag
|
||||
self.rehydrate_entity,
|
||||
self._compute_hash
|
||||
)
|
||||
|
||||
# Override settings tags with loaded tags
|
||||
# This ensures tag.json files take precedence over settings
|
||||
if tags:
|
||||
self.__data['settings']['application']['tags'].update(tags)
|
||||
logger.info(f"Loaded {len(tags)} tags from individual tag.json files")
|
||||
# Store loaded data
|
||||
self.__data['watching'] = watching
|
||||
self._watch_hashes = watch_hashes
|
||||
|
||||
# Verify all watches have hashes
|
||||
missing_hashes = [uuid for uuid in watching.keys() if uuid not in watch_hashes]
|
||||
if missing_hashes:
|
||||
logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after load: {missing_hashes[:5]}")
|
||||
else:
|
||||
logger.debug(f"All {len(watching)} watches have valid hashes")
|
||||
|
||||
def _delete_watch(self, uuid):
|
||||
"""
|
||||
@@ -458,7 +414,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
def set_last_viewed(self, uuid, timestamp):
|
||||
logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}")
|
||||
self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
|
||||
self.data['watching'][uuid].commit()
|
||||
self.mark_watch_dirty(uuid)
|
||||
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
@@ -466,79 +422,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
def remove_password(self):
|
||||
self.__data['settings']['application']['password'] = False
|
||||
self.commit()
|
||||
|
||||
def clear_all_last_checksums(self):
|
||||
"""
|
||||
Delete all last-checksum.txt files to force reprocessing of all watches.
|
||||
|
||||
This should be called when global settings change, since watches inherit
|
||||
configuration and need to reprocess even if their individual watch dict
|
||||
hasn't been modified.
|
||||
|
||||
Note: We delete the checksum file rather than setting was_edited=True because:
|
||||
- was_edited is not persisted across restarts
|
||||
- File deletion ensures reprocessing works across app restarts
|
||||
"""
|
||||
deleted_count = 0
|
||||
for uuid in self.__data['watching'].keys():
|
||||
watch = self.__data['watching'][uuid]
|
||||
if watch.data_dir:
|
||||
checksum_file = os.path.join(watch.data_dir, 'last-checksum.txt')
|
||||
if os.path.isfile(checksum_file):
|
||||
try:
|
||||
os.remove(checksum_file)
|
||||
deleted_count += 1
|
||||
logger.debug(f"Cleared checksum for watch {uuid}")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to delete checksum file for {uuid}: {e}")
|
||||
|
||||
logger.info(f"Cleared {deleted_count} checksum files to force reprocessing")
|
||||
return deleted_count
|
||||
|
||||
def clear_checksums_for_tag(self, tag_uuid):
|
||||
"""
|
||||
Delete last-checksum.txt files for all watches using a specific tag.
|
||||
|
||||
This should be called when a tag configuration is edited, since watches
|
||||
inherit tag settings and need to reprocess.
|
||||
|
||||
Args:
|
||||
tag_uuid: UUID of the tag that was modified
|
||||
|
||||
Returns:
|
||||
int: Number of checksum files deleted
|
||||
"""
|
||||
deleted_count = 0
|
||||
for uuid, watch in self.__data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
if watch.data_dir:
|
||||
checksum_file = os.path.join(watch.data_dir, 'last-checksum.txt')
|
||||
if os.path.isfile(checksum_file):
|
||||
try:
|
||||
os.remove(checksum_file)
|
||||
deleted_count += 1
|
||||
logger.debug(f"Cleared checksum for watch {uuid} (tag {tag_uuid})")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to delete checksum file for {uuid}: {e}")
|
||||
|
||||
logger.info(f"Cleared {deleted_count} checksum files for tag {tag_uuid}")
|
||||
return deleted_count
|
||||
|
||||
def commit(self):
|
||||
"""
|
||||
Save settings immediately to disk using atomic write.
|
||||
|
||||
Uses atomic write pattern (temp file + rename) for crash safety.
|
||||
|
||||
Fire-and-forget: Logs errors but does not raise exceptions.
|
||||
Settings data remains in memory even if save fails, so next commit will retry.
|
||||
"""
|
||||
try:
|
||||
self._save_settings()
|
||||
logger.debug("Committed settings")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to commit settings: {e}")
|
||||
self.mark_settings_dirty()
|
||||
|
||||
def update_watch(self, uuid, update_obj):
|
||||
|
||||
@@ -557,8 +441,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
self.__data['watching'][uuid].update(update_obj)
|
||||
|
||||
# Immediate save
|
||||
self.__data['watching'][uuid].commit()
|
||||
self.mark_watch_dirty(uuid)
|
||||
|
||||
@property
|
||||
def threshold_seconds(self):
|
||||
@@ -619,6 +502,10 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete watch {watch_uuid} from storage: {e}")
|
||||
|
||||
# Clean up tracking data
|
||||
self._watch_hashes.pop(watch_uuid, None)
|
||||
self._dirty_watches.discard(watch_uuid)
|
||||
|
||||
# Send delete signal
|
||||
watch_delete_signal = signal('watch_deleted')
|
||||
if watch_delete_signal:
|
||||
@@ -640,11 +527,17 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
# Remove from watching dict
|
||||
del self.data['watching'][uuid]
|
||||
|
||||
# Clean up tracking data
|
||||
self._watch_hashes.pop(uuid, None)
|
||||
self._dirty_watches.discard(uuid)
|
||||
|
||||
# Send delete signal
|
||||
watch_delete_signal = signal('watch_deleted')
|
||||
if watch_delete_signal:
|
||||
watch_delete_signal.send(watch_uuid=uuid)
|
||||
|
||||
self.needs_write_urgent = True
|
||||
|
||||
# Clone a watch by UUID
|
||||
def clone(self, uuid):
|
||||
url = self.data['watching'][uuid].get('url')
|
||||
@@ -669,7 +562,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
# Remove a watchs data but keep the entry (URL etc)
|
||||
def clear_watch_history(self, uuid):
|
||||
self.__data['watching'][uuid].clear_watch()
|
||||
self.__data['watching'][uuid].commit()
|
||||
self.needs_write_urgent = True
|
||||
|
||||
def add_watch(self, url, tag='', extras=None, tag_uuids=None, save_immediately=True):
|
||||
|
||||
@@ -728,11 +621,8 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
return False
|
||||
|
||||
if not is_safe_valid_url(url):
|
||||
from flask import has_request_context
|
||||
if has_request_context():
|
||||
flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
|
||||
else:
|
||||
logger.error(f"add_watch: URL '{url}' is not permitted or invalid, skipping.")
|
||||
flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
|
||||
|
||||
return None
|
||||
|
||||
# Check PAGE_WATCH_LIMIT if set
|
||||
@@ -785,9 +675,16 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
self.__data['watching'][new_uuid] = new_watch
|
||||
|
||||
if save_immediately:
|
||||
# Save immediately using commit
|
||||
new_watch.commit()
|
||||
logger.debug(f"Saved new watch {new_uuid}")
|
||||
# Save immediately using polymorphic method
|
||||
try:
|
||||
self.save_watch(new_uuid, force=True)
|
||||
logger.debug(f"Saved new watch {new_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save new watch {new_uuid}: {e}")
|
||||
# Mark dirty for retry
|
||||
self.mark_watch_dirty(new_uuid)
|
||||
else:
|
||||
self.mark_watch_dirty(new_uuid)
|
||||
|
||||
logger.debug(f"Added '{url}'")
|
||||
|
||||
@@ -822,6 +719,25 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
# Old sync_to_json and save_datastore methods removed - now handled by FileSavingDataStore parent class
|
||||
|
||||
# Go through the datastore path and remove any snapshots that are not mentioned in the index
|
||||
# This usually is not used, but can be handy.
|
||||
def remove_unused_snapshots(self):
|
||||
logger.info("Removing snapshots from datastore that are not in the index..")
|
||||
|
||||
index = []
|
||||
for uuid in self.data['watching']:
|
||||
for id in self.data['watching'][uuid].history:
|
||||
index.append(self.data['watching'][uuid].history[str(id)])
|
||||
|
||||
import pathlib
|
||||
|
||||
# Only in the sub-directories
|
||||
for uuid in self.data['watching']:
|
||||
for item in pathlib.Path(self.datastore_path).rglob(uuid + "/*.txt"):
|
||||
if not str(item) in index:
|
||||
logger.info(f"Removing {item}")
|
||||
unlink(item)
|
||||
|
||||
@property
|
||||
def proxy_list(self):
|
||||
proxy_list = {}
|
||||
@@ -913,7 +829,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
if watch:
|
||||
|
||||
# In /datastore/xyz-xyz/headers.txt
|
||||
filepath = os.path.join(watch.data_dir, 'headers.txt')
|
||||
filepath = os.path.join(watch.watch_data_dir, 'headers.txt')
|
||||
try:
|
||||
if os.path.isfile(filepath):
|
||||
headers.update(parse_headers_from_text_file(filepath))
|
||||
@@ -960,21 +876,16 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
# So we use the same model as a Watch
|
||||
with self.lock:
|
||||
from ..model import Tag
|
||||
new_tag = Tag.model(
|
||||
datastore_path=self.datastore_path,
|
||||
__datastore=self.__data,
|
||||
default={
|
||||
'title': title.strip(),
|
||||
'date_created': int(time.time())
|
||||
}
|
||||
)
|
||||
new_tag = Tag.model(datastore_path=self.datastore_path, default={
|
||||
'title': title.strip(),
|
||||
'date_created': int(time.time())
|
||||
})
|
||||
|
||||
new_uuid = new_tag.get('uuid')
|
||||
|
||||
self.__data['settings']['application']['tags'][new_uuid] = new_tag
|
||||
|
||||
# Save tag to its own tag.json file instead of settings
|
||||
new_tag.commit()
|
||||
self.mark_settings_dirty()
|
||||
return new_uuid
|
||||
|
||||
def get_all_tags_for_watch(self, uuid):
|
||||
@@ -1091,7 +1002,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
notification_urls.append(notification_url)
|
||||
self.__data['settings']['application']['notification_urls'] = notification_urls
|
||||
|
||||
self.commit()
|
||||
self.mark_settings_dirty()
|
||||
return notification_url
|
||||
|
||||
# Schema update methods moved to store/updates.py (DatastoreUpdatesMixin)
|
||||
|
||||
@@ -81,3 +81,20 @@ class DataStore(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def force_save_all(self):
|
||||
"""
|
||||
Force immediate synchronous save of all data to storage.
|
||||
|
||||
This is the abstract method for forcing a complete save.
|
||||
Different backends implement this differently:
|
||||
- File backend: Mark all watches/settings dirty, then save
|
||||
- Redis backend: SAVE command or pipeline flush
|
||||
- SQL backend: COMMIT transaction
|
||||
|
||||
Used by:
|
||||
- Backup creation (ensure everything is saved before backup)
|
||||
- Shutdown (ensure all changes are persisted)
|
||||
- Manual save operations
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -1,17 +1,22 @@
|
||||
"""
|
||||
File-based datastore with individual watch persistence and immediate commits.
|
||||
File-based datastore with individual watch persistence and dirty tracking.
|
||||
|
||||
This module provides the FileSavingDataStore abstract class that implements:
|
||||
- Individual watch.json file persistence
|
||||
- Immediate commit-based persistence (watch.commit(), datastore.commit())
|
||||
- Hash-based change detection (only save what changed)
|
||||
- Periodic audit scan (catches unmarked changes)
|
||||
- Background save thread with batched parallel saves
|
||||
- Atomic file writes safe for NFS/NAS
|
||||
"""
|
||||
|
||||
import glob
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from threading import Thread
|
||||
from loguru import logger
|
||||
|
||||
from .base import DataStore
|
||||
@@ -29,6 +34,19 @@ except ImportError:
|
||||
# Set to True for mission-critical deployments requiring crash consistency
|
||||
FORCE_FSYNC_DATA_IS_CRITICAL = bool(strtobool(os.getenv('FORCE_FSYNC_DATA_IS_CRITICAL', 'False')))
|
||||
|
||||
# Save interval configuration: How often the background thread saves dirty items
|
||||
# Default 10 seconds - increase for less frequent saves, decrease for more frequent
|
||||
DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS = int(os.getenv('DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS', '10'))
|
||||
|
||||
# Rolling audit configuration: Scans a fraction of watches each cycle
|
||||
# Default: Run audit every 10s, split into 5 shards
|
||||
# Full audit completes every 50s (10s × 5 shards)
|
||||
# With 56k watches: 56k / 5 = ~11k watches per cycle (~60ms vs 316ms for all)
|
||||
# Handles dynamic watch count - recalculates shard boundaries each cycle
|
||||
DATASTORE_AUDIT_INTERVAL_SECONDS = int(os.getenv('DATASTORE_AUDIT_INTERVAL_SECONDS', '10'))
|
||||
DATASTORE_AUDIT_SHARDS = int(os.getenv('DATASTORE_AUDIT_SHARDS', '5'))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helper Functions for Atomic File Operations
|
||||
# ============================================================================
|
||||
@@ -43,9 +61,6 @@ def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
|
||||
- Size validation
|
||||
- Proper error handling
|
||||
|
||||
Thread safety: Caller must hold datastore.lock to prevent concurrent modifications.
|
||||
Multi-process safety: Not supported - run only one app instance per datastore.
|
||||
|
||||
Args:
|
||||
file_path: Full path to target JSON file
|
||||
data_dict: Dictionary to serialize
|
||||
@@ -175,37 +190,23 @@ def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
|
||||
raise e
|
||||
|
||||
|
||||
def save_entity_atomic(entity_dir, uuid, entity_dict, filename, entity_type, max_size_mb):
|
||||
"""
|
||||
Save an entity (watch/tag) to disk using atomic write pattern.
|
||||
|
||||
Generic function for saving any watch_base subclass (Watch, Tag, etc.).
|
||||
|
||||
Args:
|
||||
entity_dir: Directory for this entity (e.g., /datastore/{uuid})
|
||||
uuid: Entity UUID (for logging)
|
||||
entity_dict: Dictionary representation of the entity
|
||||
filename: JSON filename (e.g., 'watch.json', 'tag.json')
|
||||
entity_type: Type label for logging (e.g., 'watch', 'tag')
|
||||
max_size_mb: Maximum allowed file size in MB
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds max_size_mb
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
"""
|
||||
entity_json = os.path.join(entity_dir, filename)
|
||||
save_json_atomic(entity_json, entity_dict, label=f"{entity_type} {uuid}", max_size_mb=max_size_mb)
|
||||
|
||||
|
||||
def save_watch_atomic(watch_dir, uuid, watch_dict):
|
||||
"""
|
||||
Save a watch to disk using atomic write pattern.
|
||||
|
||||
Convenience wrapper around save_entity_atomic for watches.
|
||||
Kept for backwards compatibility.
|
||||
"""
|
||||
save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)
|
||||
Convenience wrapper around save_json_atomic for watches.
|
||||
|
||||
Args:
|
||||
watch_dir: Directory for this watch (e.g., /datastore/{uuid})
|
||||
uuid: Watch UUID (for logging)
|
||||
watch_dict: Dictionary representation of the watch
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds 10MB (indicates bug or corruption)
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
"""
|
||||
watch_json = os.path.join(watch_dir, "watch.json")
|
||||
save_json_atomic(watch_json, watch_dict, label=f"watch {uuid}", max_size_mb=10)
|
||||
|
||||
|
||||
def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
@@ -218,7 +219,8 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
rehydrate_entity_func: Function to convert dict to Watch object
|
||||
|
||||
Returns:
|
||||
Watch object or None if failed
|
||||
Tuple of (Watch object, raw_data_dict) or (None, None) if failed
|
||||
The raw_data_dict is needed to compute the hash before rehydration
|
||||
"""
|
||||
try:
|
||||
# Check file size before reading
|
||||
@@ -231,7 +233,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
f"File: {watch_json}. This indicates a bug or data corruption. "
|
||||
f"Watch will be skipped."
|
||||
)
|
||||
return None
|
||||
return None, None
|
||||
|
||||
if HAS_ORJSON:
|
||||
with open(watch_json, 'rb') as f:
|
||||
@@ -240,9 +242,15 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
with open(watch_json, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
|
||||
# Rehydrate and return watch object
|
||||
if watch_data.get('time_schedule_limit'):
|
||||
del watch_data['time_schedule_limit']
|
||||
if watch_data.get('time_between_check'):
|
||||
del watch_data['time_between_check']
|
||||
|
||||
# Return both the raw data and the rehydrated watch
|
||||
# Raw data is needed to compute hash before rehydration changes anything
|
||||
watch_obj = rehydrate_entity_func(uuid, watch_data)
|
||||
return watch_obj
|
||||
return watch_obj, watch_data
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.critical(
|
||||
@@ -250,7 +258,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
return None, None
|
||||
except ValueError as e:
|
||||
# orjson raises ValueError for invalid JSON
|
||||
if "invalid json" in str(e).lower() or HAS_ORJSON:
|
||||
@@ -259,18 +267,18 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
return None, None
|
||||
# Re-raise if it's not a JSON parsing error
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
|
||||
return None
|
||||
return None, None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
|
||||
return None
|
||||
return None, None
|
||||
|
||||
|
||||
def load_all_watches(datastore_path, rehydrate_entity_func):
|
||||
def load_all_watches(datastore_path, rehydrate_entity_func, compute_hash_func):
|
||||
"""
|
||||
Load all watches from individual watch.json files.
|
||||
|
||||
@@ -281,17 +289,21 @@ def load_all_watches(datastore_path, rehydrate_entity_func):
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
rehydrate_entity_func: Function to convert dict to Watch object
|
||||
compute_hash_func: Function to compute hash from raw watch dict
|
||||
|
||||
Returns:
|
||||
Dictionary of uuid -> Watch object
|
||||
Tuple of (watching_dict, hashes_dict)
|
||||
- watching_dict: uuid -> Watch object
|
||||
- hashes_dict: uuid -> hash string (computed from raw data)
|
||||
"""
|
||||
start_time = time.time()
|
||||
logger.info("Loading watches from individual watch.json files...")
|
||||
|
||||
watching = {}
|
||||
watch_hashes = {}
|
||||
|
||||
if not os.path.exists(datastore_path):
|
||||
return watching
|
||||
return watching, watch_hashes
|
||||
|
||||
# Find all watch.json files using glob (faster than manual directory traversal)
|
||||
glob_start = time.time()
|
||||
@@ -307,9 +319,12 @@ def load_all_watches(datastore_path, rehydrate_entity_func):
|
||||
for watch_json in watch_files:
|
||||
# Extract UUID from path: /datastore/{uuid}/watch.json
|
||||
uuid_dir = os.path.basename(os.path.dirname(watch_json))
|
||||
watch = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
|
||||
if watch:
|
||||
watch, raw_data = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
|
||||
if watch and raw_data:
|
||||
watching[uuid_dir] = watch
|
||||
# Compute hash from rehydrated Watch object (as dict) to match how we compute on save
|
||||
# This ensures hash matches what audit will compute from dict(watch)
|
||||
watch_hashes[uuid_dir] = compute_hash_func(dict(watch))
|
||||
loaded += 1
|
||||
|
||||
if loaded % 100 == 0:
|
||||
@@ -329,123 +344,7 @@ def load_all_watches(datastore_path, rehydrate_entity_func):
|
||||
else:
|
||||
logger.info(f"Loaded {loaded} watches from disk in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)")
|
||||
|
||||
return watching
|
||||
|
||||
|
||||
def load_tag_from_file(tag_json, uuid, rehydrate_entity_func):
|
||||
"""
|
||||
Load a tag from its JSON file.
|
||||
|
||||
Args:
|
||||
tag_json: Path to the tag.json file
|
||||
uuid: Tag UUID
|
||||
rehydrate_entity_func: Function to convert dict to Tag object
|
||||
|
||||
Returns:
|
||||
Tag object or None if failed
|
||||
"""
|
||||
try:
|
||||
# Check file size before reading
|
||||
file_size = os.path.getsize(tag_json)
|
||||
MAX_TAG_SIZE = 1 * 1024 * 1024 # 1MB
|
||||
if file_size > MAX_TAG_SIZE:
|
||||
logger.critical(
|
||||
f"CORRUPTED TAG DATA: Tag {uuid} file is unexpectedly large: "
|
||||
f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_TAG_SIZE / 1024 / 1024}MB). "
|
||||
f"File: {tag_json}. This indicates a bug or data corruption. "
|
||||
f"Tag will be skipped."
|
||||
)
|
||||
return None
|
||||
|
||||
if HAS_ORJSON:
|
||||
with open(tag_json, 'rb') as f:
|
||||
tag_data = orjson.loads(f.read())
|
||||
else:
|
||||
with open(tag_json, 'r', encoding='utf-8') as f:
|
||||
tag_data = json.load(f)
|
||||
|
||||
tag_data['processor'] = 'restock_diff'
|
||||
# Rehydrate tag (convert dict to Tag object)
|
||||
# processor_override is set inside the rehydration function
|
||||
tag_obj = rehydrate_entity_func(uuid, tag_data)
|
||||
return tag_obj
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.critical(
|
||||
f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
|
||||
f"File: {tag_json}. Error: {e}. "
|
||||
f"Tag will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
except ValueError as e:
|
||||
# orjson raises ValueError for invalid JSON
|
||||
if "invalid json" in str(e).lower() or HAS_ORJSON:
|
||||
logger.critical(
|
||||
f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
|
||||
f"File: {tag_json}. Error: {e}. "
|
||||
f"Tag will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
# Re-raise if it's not a JSON parsing error
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
logger.debug(f"Tag file not found: {tag_json} for tag {uuid}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load tag {uuid} from {tag_json}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def load_all_tags(datastore_path, rehydrate_entity_func):
|
||||
"""
|
||||
Load all tags from individual tag.json files.
|
||||
|
||||
Tags are stored separately from settings in {uuid}/tag.json files.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
rehydrate_entity_func: Function to convert dict to Tag object
|
||||
|
||||
Returns:
|
||||
Dictionary of uuid -> Tag object
|
||||
"""
|
||||
logger.info("Loading tags from individual tag.json files...")
|
||||
|
||||
tags = {}
|
||||
|
||||
if not os.path.exists(datastore_path):
|
||||
return tags
|
||||
|
||||
# Find all tag.json files using glob
|
||||
tag_files = glob.glob(os.path.join(datastore_path, "*", "tag.json"))
|
||||
|
||||
total = len(tag_files)
|
||||
if total == 0:
|
||||
logger.debug("No tag.json files found")
|
||||
return tags
|
||||
|
||||
logger.debug(f"Found {total} tag.json files")
|
||||
|
||||
loaded = 0
|
||||
failed = 0
|
||||
|
||||
for tag_json in tag_files:
|
||||
# Extract UUID from path: /datastore/{uuid}/tag.json
|
||||
uuid_dir = os.path.basename(os.path.dirname(tag_json))
|
||||
tag = load_tag_from_file(tag_json, uuid_dir, rehydrate_entity_func)
|
||||
if tag:
|
||||
tags[uuid_dir] = tag
|
||||
loaded += 1
|
||||
else:
|
||||
# load_tag_from_file already logged the specific error
|
||||
failed += 1
|
||||
|
||||
if failed > 0:
|
||||
logger.warning(f"Loaded {loaded} tags, {failed} tags FAILED to load")
|
||||
else:
|
||||
logger.info(f"Loaded {loaded} tags from disk")
|
||||
|
||||
return tags
|
||||
return watching, watch_hashes
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@@ -454,20 +353,151 @@ def load_all_tags(datastore_path, rehydrate_entity_func):
|
||||
|
||||
class FileSavingDataStore(DataStore):
|
||||
"""
|
||||
Abstract datastore that provides file persistence with immediate commits.
|
||||
Abstract datastore that provides file persistence with change tracking.
|
||||
|
||||
Features:
|
||||
- Individual watch.json files (one per watch)
|
||||
- Immediate persistence via watch.commit() and datastore.commit()
|
||||
- Atomic file writes for crash safety
|
||||
- Dirty tracking: Only saves items that have changed
|
||||
- Hash-based change detection: Prevents unnecessary writes
|
||||
- Background save thread: Non-blocking persistence
|
||||
- Two-tier urgency: Standard (60s) and urgent (immediate) saves
|
||||
|
||||
Subclasses must implement:
|
||||
- rehydrate_entity(): Convert dict to Watch object
|
||||
- Access to internal __data structure for watch management
|
||||
"""
|
||||
|
||||
needs_write = False
|
||||
needs_write_urgent = False
|
||||
stop_thread = False
|
||||
|
||||
# Change tracking
|
||||
_dirty_watches = set() # Watch UUIDs that need saving
|
||||
_dirty_settings = False # Settings changed
|
||||
_watch_hashes = {} # UUID -> SHA256 hash for change detection
|
||||
|
||||
# Health monitoring
|
||||
_last_save_time = 0 # Timestamp of last successful save
|
||||
_last_audit_time = 0 # Timestamp of last audit scan
|
||||
_save_cycle_count = 0 # Number of save cycles completed
|
||||
_total_saves = 0 # Total watches saved (lifetime)
|
||||
_save_errors = 0 # Total save errors (lifetime)
|
||||
_audit_count = 0 # Number of audit scans completed
|
||||
_audit_found_changes = 0 # Total unmarked changes found by audits
|
||||
_audit_shard_index = 0 # Current shard being audited (rolling audit)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.save_data_thread = None
|
||||
self._last_save_time = time.time()
|
||||
self._last_audit_time = time.time()
|
||||
|
||||
|
||||
def mark_watch_dirty(self, uuid):
|
||||
"""
|
||||
Mark a watch as needing save.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
"""
|
||||
with self.lock:
|
||||
self._dirty_watches.add(uuid)
|
||||
dirty_count = len(self._dirty_watches)
|
||||
|
||||
# Backpressure detection - warn if dirty set grows too large
|
||||
if dirty_count > 1000:
|
||||
logger.critical(
|
||||
f"BACKPRESSURE WARNING: {dirty_count} watches pending save! "
|
||||
f"Save thread may not be keeping up with write rate. "
|
||||
f"This could indicate disk I/O bottleneck or save thread failure."
|
||||
)
|
||||
elif dirty_count > 500:
|
||||
logger.warning(
|
||||
f"Dirty watch count high: {dirty_count} watches pending save. "
|
||||
f"Monitoring for potential backpressure."
|
||||
)
|
||||
|
||||
self.needs_write = True
|
||||
|
||||
def mark_settings_dirty(self):
|
||||
"""Mark settings as needing save."""
|
||||
with self.lock:
|
||||
self._dirty_settings = True
|
||||
self.needs_write = True
|
||||
|
||||
def _compute_hash(self, watch_dict):
|
||||
"""
|
||||
Compute SHA256 hash of watch for change detection.
|
||||
|
||||
Args:
|
||||
watch_dict: Dictionary representation of watch
|
||||
|
||||
Returns:
|
||||
Hex string of SHA256 hash
|
||||
"""
|
||||
# Use orjson for deterministic serialization if available
|
||||
if HAS_ORJSON:
|
||||
json_bytes = orjson.dumps(watch_dict, option=orjson.OPT_SORT_KEYS)
|
||||
else:
|
||||
json_str = json.dumps(watch_dict, sort_keys=True, ensure_ascii=False)
|
||||
json_bytes = json_str.encode('utf-8')
|
||||
|
||||
return hashlib.sha256(json_bytes).hexdigest()
|
||||
|
||||
def save_watch(self, uuid, force=False, watch_dict=None, current_hash=None):
|
||||
"""
|
||||
Save a single watch if it has changed (polymorphic method).
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
force: If True, skip hash check and save anyway
|
||||
watch_dict: Pre-computed watch dictionary (optimization)
|
||||
current_hash: Pre-computed hash (optimization)
|
||||
|
||||
Returns:
|
||||
True if saved, False if skipped (unchanged)
|
||||
"""
|
||||
if not self._watch_exists(uuid):
|
||||
logger.warning(f"Cannot save watch {uuid} - does not exist")
|
||||
return False
|
||||
|
||||
# Get watch dict if not provided
|
||||
if watch_dict is None:
|
||||
watch_dict = self._get_watch_dict(uuid)
|
||||
|
||||
# Compute hash if not provided
|
||||
if current_hash is None:
|
||||
current_hash = self._compute_hash(watch_dict)
|
||||
|
||||
# Skip save if unchanged (unless forced)
|
||||
if not force and current_hash == self._watch_hashes.get(uuid):
|
||||
return False
|
||||
|
||||
try:
|
||||
self._save_watch(uuid, watch_dict)
|
||||
self._watch_hashes[uuid] = current_hash
|
||||
logger.debug(f"Saved watch {uuid}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save watch {uuid}: {e}")
|
||||
raise
|
||||
|
||||
def _save_watch(self, uuid, watch_dict):
|
||||
"""
|
||||
Save a single watch to storage (polymorphic).
|
||||
|
||||
Backend-specific implementation. Subclasses override for different storage:
|
||||
- File backend: Writes to {uuid}/watch.json
|
||||
- Redis backend: SET watch:{uuid}
|
||||
- SQL backend: UPDATE watches WHERE uuid=?
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
watch_dict: Dictionary representation of watch
|
||||
"""
|
||||
# Default file implementation
|
||||
watch_dir = os.path.join(self.datastore_path, uuid)
|
||||
save_watch_atomic(watch_dir, uuid, watch_dict)
|
||||
|
||||
def _save_settings(self):
|
||||
"""
|
||||
@@ -480,7 +510,6 @@ class FileSavingDataStore(DataStore):
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _save_settings")
|
||||
|
||||
|
||||
def _load_watches(self):
|
||||
"""
|
||||
Load all watches from storage (polymorphic).
|
||||
@@ -506,4 +535,364 @@ class FileSavingDataStore(DataStore):
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _delete_watch")
|
||||
|
||||
def _save_dirty_items(self):
|
||||
"""
|
||||
Save dirty watches and settings.
|
||||
|
||||
This is the core optimization: instead of saving the entire datastore,
|
||||
we only save watches that were marked dirty and settings if changed.
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Capture dirty sets under lock
|
||||
with self.lock:
|
||||
dirty_watches = list(self._dirty_watches)
|
||||
dirty_settings = self._dirty_settings
|
||||
self._dirty_watches.clear()
|
||||
self._dirty_settings = False
|
||||
|
||||
if not dirty_watches and not dirty_settings:
|
||||
return
|
||||
|
||||
logger.trace(f"Saving {len(dirty_watches)} dirty watches, settings_dirty={dirty_settings}")
|
||||
|
||||
# Save each dirty watch using the polymorphic save method
|
||||
saved_count = 0
|
||||
error_count = 0
|
||||
skipped_unchanged = 0
|
||||
|
||||
# Process in batches of 50, using thread pool for parallel saves
|
||||
BATCH_SIZE = 50
|
||||
MAX_WORKERS = 20 # Number of parallel save threads
|
||||
|
||||
def save_single_watch(uuid):
|
||||
"""Helper function for thread pool execution."""
|
||||
try:
|
||||
# Check if watch still exists (might have been deleted)
|
||||
if not self._watch_exists(uuid):
|
||||
# Watch was deleted, remove hash
|
||||
self._watch_hashes.pop(uuid, None)
|
||||
return {'status': 'deleted', 'uuid': uuid}
|
||||
|
||||
# Pre-check hash to avoid unnecessary save_watch() calls
|
||||
watch_dict = self._get_watch_dict(uuid)
|
||||
current_hash = self._compute_hash(watch_dict)
|
||||
|
||||
if current_hash == self._watch_hashes.get(uuid):
|
||||
# Watch hasn't actually changed, skip
|
||||
return {'status': 'unchanged', 'uuid': uuid}
|
||||
|
||||
# Pass pre-computed values to avoid redundant serialization/hashing
|
||||
if self.save_watch(uuid, force=True, watch_dict=watch_dict, current_hash=current_hash):
|
||||
return {'status': 'saved', 'uuid': uuid}
|
||||
else:
|
||||
return {'status': 'skipped', 'uuid': uuid}
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving watch {uuid}: {e}")
|
||||
return {'status': 'error', 'uuid': uuid, 'error': e}
|
||||
|
||||
# Process dirty watches in batches
|
||||
for batch_start in range(0, len(dirty_watches), BATCH_SIZE):
|
||||
batch = dirty_watches[batch_start:batch_start + BATCH_SIZE]
|
||||
batch_num = (batch_start // BATCH_SIZE) + 1
|
||||
total_batches = (len(dirty_watches) + BATCH_SIZE - 1) // BATCH_SIZE
|
||||
|
||||
if len(dirty_watches) > BATCH_SIZE:
|
||||
logger.trace(f"Save batch {batch_num}/{total_batches} ({len(batch)} watches)")
|
||||
|
||||
# Use thread pool to save watches in parallel
|
||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||
# Submit all save tasks
|
||||
future_to_uuid = {executor.submit(save_single_watch, uuid): uuid for uuid in batch}
|
||||
|
||||
# Collect results as they complete
|
||||
for future in as_completed(future_to_uuid):
|
||||
result = future.result()
|
||||
status = result['status']
|
||||
|
||||
if status == 'saved':
|
||||
saved_count += 1
|
||||
elif status == 'unchanged':
|
||||
skipped_unchanged += 1
|
||||
elif status == 'error':
|
||||
error_count += 1
|
||||
# Re-mark for retry
|
||||
with self.lock:
|
||||
self._dirty_watches.add(result['uuid'])
|
||||
# 'deleted' and 'skipped' don't need special handling
|
||||
|
||||
# Save settings if changed
|
||||
if dirty_settings:
|
||||
try:
|
||||
self._save_settings()
|
||||
logger.debug("Saved settings")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save settings: {e}")
|
||||
error_count += 1
|
||||
with self.lock:
|
||||
self._dirty_settings = True
|
||||
|
||||
# Update metrics
|
||||
elapsed = time.time() - start_time
|
||||
self._save_cycle_count += 1
|
||||
self._total_saves += saved_count
|
||||
self._save_errors += error_count
|
||||
self._last_save_time = time.time()
|
||||
|
||||
# Log performance metrics
|
||||
if saved_count > 0:
|
||||
avg_time_per_watch = (elapsed / saved_count) * 1000 # milliseconds
|
||||
skipped_msg = f", {skipped_unchanged} unchanged" if skipped_unchanged > 0 else ""
|
||||
parallel_msg = f" [parallel: {MAX_WORKERS} workers]" if saved_count > 1 else ""
|
||||
logger.info(
|
||||
f"Successfully saved {saved_count} watches in {elapsed:.2f}s "
|
||||
f"(avg {avg_time_per_watch:.1f}ms per watch{skipped_msg}){parallel_msg}. "
|
||||
f"Total: {self._total_saves} saves, {self._save_errors} errors (lifetime)"
|
||||
)
|
||||
elif skipped_unchanged > 0:
|
||||
logger.debug(f"Save cycle: {skipped_unchanged} watches verified unchanged (hash match), nothing saved")
|
||||
|
||||
if error_count > 0:
|
||||
logger.error(f"Save cycle completed with {error_count} errors")
|
||||
|
||||
self.needs_write = False
|
||||
self.needs_write_urgent = False
|
||||
|
||||
def _watch_exists(self, uuid):
|
||||
"""
|
||||
Check if watch exists. Subclass must implement.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
|
||||
Returns:
|
||||
bool
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _watch_exists")
|
||||
|
||||
def _get_watch_dict(self, uuid):
|
||||
"""
|
||||
Get watch as dictionary. Subclass must implement.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
|
||||
Returns:
|
||||
Dictionary representation of watch
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _get_watch_dict")
|
||||
|
||||
def _audit_all_watches(self):
|
||||
"""
|
||||
Rolling audit: Scans a fraction of watches to detect unmarked changes.
|
||||
|
||||
Instead of scanning ALL watches at once, this scans 1/N shards per cycle.
|
||||
The shard rotates each cycle, completing a full audit every N cycles.
|
||||
|
||||
Handles dynamic watch count - recalculates shard boundaries each cycle,
|
||||
so newly added watches will be audited in subsequent cycles.
|
||||
|
||||
Benefits:
|
||||
- Lower CPU per cycle (56k / 5 = ~11k watches vs all 56k)
|
||||
- More frequent audits overall (every 50s vs every 10s)
|
||||
- Spreads load evenly across time
|
||||
"""
|
||||
audit_start = time.time()
|
||||
|
||||
# Get list of all watch UUIDs (read-only, no lock needed)
|
||||
try:
|
||||
all_uuids = list(self.data['watching'].keys())
|
||||
except (KeyError, AttributeError, RuntimeError):
|
||||
# Data structure not ready or being modified
|
||||
return
|
||||
|
||||
if not all_uuids:
|
||||
return
|
||||
|
||||
total_watches = len(all_uuids)
|
||||
|
||||
# Calculate this cycle's shard boundaries
|
||||
# Example: 56,278 watches / 5 shards = 11,255 watches per shard
|
||||
# Shard 0: [0:11255], Shard 1: [11255:22510], etc.
|
||||
shard_size = (total_watches + DATASTORE_AUDIT_SHARDS - 1) // DATASTORE_AUDIT_SHARDS
|
||||
start_idx = self._audit_shard_index * shard_size
|
||||
end_idx = min(start_idx + shard_size, total_watches)
|
||||
|
||||
# Handle wrap-around (shouldn't happen normally, but defensive)
|
||||
if start_idx >= total_watches:
|
||||
self._audit_shard_index = 0
|
||||
start_idx = 0
|
||||
end_idx = min(shard_size, total_watches)
|
||||
|
||||
# Audit only this shard's watches
|
||||
shard_uuids = all_uuids[start_idx:end_idx]
|
||||
|
||||
changes_found = 0
|
||||
errors = 0
|
||||
|
||||
for uuid in shard_uuids:
|
||||
try:
|
||||
# Get current watch dict and compute hash
|
||||
watch_dict = self._get_watch_dict(uuid)
|
||||
current_hash = self._compute_hash(watch_dict)
|
||||
stored_hash = self._watch_hashes.get(uuid)
|
||||
|
||||
# If hash changed and not already marked dirty, mark it
|
||||
if current_hash != stored_hash:
|
||||
with self.lock:
|
||||
if uuid not in self._dirty_watches:
|
||||
self._dirty_watches.add(uuid)
|
||||
changes_found += 1
|
||||
logger.warning(
|
||||
f"Audit detected unmarked change in watch {uuid[:8]}... current {current_hash:8} stored hash {stored_hash[:8]}"
|
||||
f"(hash changed but not marked dirty)"
|
||||
)
|
||||
self.needs_write = True
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
logger.trace(f"Audit error for watch {uuid[:8]}...: {e}")
|
||||
|
||||
audit_elapsed = (time.time() - audit_start) * 1000 # milliseconds
|
||||
|
||||
# Advance to next shard (wrap around after last shard)
|
||||
self._audit_shard_index = (self._audit_shard_index + 1) % DATASTORE_AUDIT_SHARDS
|
||||
|
||||
# Update metrics
|
||||
self._audit_count += 1
|
||||
self._audit_found_changes += changes_found
|
||||
self._last_audit_time = time.time()
|
||||
|
||||
if changes_found > 0:
|
||||
logger.warning(
|
||||
f"Audit shard {self._audit_shard_index}/{DATASTORE_AUDIT_SHARDS} found {changes_found} "
|
||||
f"unmarked changes in {len(shard_uuids)}/{total_watches} watches ({audit_elapsed:.1f}ms)"
|
||||
)
|
||||
else:
|
||||
logger.trace(
|
||||
f"Audit shard {self._audit_shard_index}/{DATASTORE_AUDIT_SHARDS}: "
|
||||
f"{len(shard_uuids)}/{total_watches} watches checked, 0 changes ({audit_elapsed:.1f}ms)"
|
||||
)
|
||||
|
||||
def save_datastore(self):
|
||||
"""
|
||||
Background thread that periodically saves dirty items and audits watches.
|
||||
|
||||
Runs two independent cycles:
|
||||
1. Save dirty items every DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS (default 10s)
|
||||
2. Rolling audit: every DATASTORE_AUDIT_INTERVAL_SECONDS (default 10s)
|
||||
- Scans 1/DATASTORE_AUDIT_SHARDS watches per cycle (default 1/5)
|
||||
- Full audit completes every 50s (10s × 5 shards)
|
||||
- Automatically handles new/deleted watches
|
||||
|
||||
Uses 0.5s sleep intervals for responsiveness to urgent saves.
|
||||
"""
|
||||
while True:
|
||||
if self.stop_thread:
|
||||
# Graceful shutdown: flush any remaining dirty items before stopping
|
||||
if self.needs_write or self._dirty_watches or self._dirty_settings:
|
||||
logger.warning("Datastore save thread stopping - flushing remaining dirty items...")
|
||||
try:
|
||||
self._save_dirty_items()
|
||||
logger.info("Graceful shutdown complete - all data saved")
|
||||
except Exception as e:
|
||||
logger.critical(f"FAILED to save dirty items during shutdown: {e}")
|
||||
else:
|
||||
logger.info("Datastore save thread stopping - no dirty items")
|
||||
return
|
||||
|
||||
# Check if it's time to run audit scan (every N seconds)
|
||||
if time.time() - self._last_audit_time >= DATASTORE_AUDIT_INTERVAL_SECONDS:
|
||||
try:
|
||||
self._audit_all_watches()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in audit cycle: {e}")
|
||||
|
||||
# Save dirty items if needed
|
||||
if self.needs_write or self.needs_write_urgent:
|
||||
try:
|
||||
self._save_dirty_items()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in save cycle: {e}")
|
||||
|
||||
# Timer with early break for urgent saves
|
||||
# Each iteration is 0.5 seconds, so iterations = DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS * 2
|
||||
for i in range(DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS * 2):
|
||||
time.sleep(0.5)
|
||||
if self.stop_thread or self.needs_write_urgent:
|
||||
break
|
||||
|
||||
def start_save_thread(self):
|
||||
"""Start the background save thread."""
|
||||
if not self.save_data_thread or not self.save_data_thread.is_alive():
|
||||
self.save_data_thread = Thread(target=self.save_datastore, daemon=True, name="DatastoreSaver")
|
||||
self.save_data_thread.start()
|
||||
logger.info("Datastore save thread started")
|
||||
|
||||
def force_save_all(self):
|
||||
"""
|
||||
Force immediate synchronous save of all changes to storage.
|
||||
|
||||
File backend implementation of the abstract force_save_all() method.
|
||||
Marks all watches and settings as dirty, then saves immediately.
|
||||
|
||||
Used by:
|
||||
- Backup creation (ensure everything is saved before backup)
|
||||
- Shutdown (ensure all changes are persisted)
|
||||
- Manual save operations
|
||||
"""
|
||||
logger.info("Force saving all data to storage...")
|
||||
|
||||
# Mark everything as dirty to ensure complete save
|
||||
for uuid in self.data['watching'].keys():
|
||||
self.mark_watch_dirty(uuid)
|
||||
self.mark_settings_dirty()
|
||||
|
||||
# Save immediately (synchronous)
|
||||
self._save_dirty_items()
|
||||
|
||||
logger.success("All data saved to storage")
|
||||
|
||||
def get_health_status(self):
|
||||
"""
|
||||
Get datastore health status for monitoring.
|
||||
|
||||
Returns:
|
||||
dict with health metrics and status
|
||||
"""
|
||||
now = time.time()
|
||||
time_since_last_save = now - self._last_save_time
|
||||
|
||||
with self.lock:
|
||||
dirty_count = len(self._dirty_watches)
|
||||
|
||||
is_thread_alive = self.save_data_thread and self.save_data_thread.is_alive()
|
||||
|
||||
# Determine health status
|
||||
if not is_thread_alive:
|
||||
status = "CRITICAL"
|
||||
message = "Save thread is DEAD"
|
||||
elif time_since_last_save > 300: # 5 minutes
|
||||
status = "WARNING"
|
||||
message = f"No save activity for {time_since_last_save:.0f}s"
|
||||
elif dirty_count > 1000:
|
||||
status = "WARNING"
|
||||
message = f"High backpressure: {dirty_count} watches pending"
|
||||
elif self._save_errors > 0 and (self._save_errors / max(self._total_saves, 1)) > 0.01:
|
||||
status = "WARNING"
|
||||
message = f"High error rate: {self._save_errors} errors"
|
||||
else:
|
||||
status = "HEALTHY"
|
||||
message = "Operating normally"
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"message": message,
|
||||
"thread_alive": is_thread_alive,
|
||||
"dirty_watches": dirty_count,
|
||||
"dirty_settings": self._dirty_settings,
|
||||
"last_save_seconds_ago": int(time_since_last_save),
|
||||
"save_cycles": self._save_cycle_count,
|
||||
"total_saves": self._total_saves,
|
||||
"total_errors": self._save_errors,
|
||||
"error_rate_percent": round((self._save_errors / max(self._total_saves, 1)) * 100, 2)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
"""
|
||||
Legacy format loader for url-watches.json.
|
||||
|
||||
Provides functions to detect and load from the legacy monolithic JSON format.
|
||||
Used during migration (update_26) to transition to individual watch.json files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from loguru import logger
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
HAS_ORJSON = True
|
||||
except ImportError:
|
||||
HAS_ORJSON = False
|
||||
|
||||
|
||||
def has_legacy_datastore(datastore_path):
|
||||
"""
|
||||
Check if a legacy url-watches.json file exists.
|
||||
|
||||
This is used by update_26 to determine if migration is needed.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to datastore directory
|
||||
|
||||
Returns:
|
||||
bool: True if url-watches.json exists
|
||||
"""
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
return os.path.exists(url_watches_json)
|
||||
|
||||
|
||||
def load_legacy_format(json_store_path):
|
||||
"""
|
||||
Load datastore from legacy url-watches.json format.
|
||||
|
||||
Args:
|
||||
json_store_path: Full path to url-watches.json file
|
||||
|
||||
Returns:
|
||||
dict: Loaded datastore data with 'watching', 'settings', etc.
|
||||
None: If file doesn't exist or loading failed
|
||||
"""
|
||||
logger.info(f"Loading from legacy format: {json_store_path}")
|
||||
|
||||
if not os.path.isfile(json_store_path):
|
||||
logger.warning(f"Legacy file not found: {json_store_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
if HAS_ORJSON:
|
||||
with open(json_store_path, 'rb') as f:
|
||||
data = orjson.loads(f.read())
|
||||
else:
|
||||
with open(json_store_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
logger.info(f"Loaded {len(data.get('watching', {}))} watches from legacy format")
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load legacy format: {e}")
|
||||
return None
|
||||
@@ -16,18 +16,12 @@ import time
|
||||
from loguru import logger
|
||||
from copy import deepcopy
|
||||
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
HAS_ORJSON = True
|
||||
except ImportError:
|
||||
HAS_ORJSON = False
|
||||
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
from ..processors.restock_diff import Restock
|
||||
from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
|
||||
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from .file_saving_datastore import save_watch_atomic
|
||||
|
||||
|
||||
def create_backup_tarball(datastore_path, update_number):
|
||||
"""
|
||||
@@ -35,7 +29,6 @@ def create_backup_tarball(datastore_path, update_number):
|
||||
|
||||
Includes:
|
||||
- All {uuid}/watch.json files
|
||||
- All {uuid}/tag.json files
|
||||
- changedetection.json (settings, if it exists)
|
||||
- url-watches.json (legacy format, if it exists)
|
||||
- Directory structure preserved
|
||||
@@ -51,7 +44,7 @@ def create_backup_tarball(datastore_path, update_number):
|
||||
To restore from a backup:
|
||||
cd /path/to/datastore
|
||||
tar -xzf before-update-N-timestamp.tar.gz
|
||||
This will restore all watch.json and tag.json files and settings to their pre-update state.
|
||||
This will restore all watch.json files and settings to their pre-update state.
|
||||
"""
|
||||
timestamp = int(time.time())
|
||||
backup_filename = f"before-update-{update_number}-{timestamp}.tar.gz"
|
||||
@@ -73,10 +66,9 @@ def create_backup_tarball(datastore_path, update_number):
|
||||
tar.add(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Backup all watch/tag directories with their JSON files
|
||||
# Backup all watch directories with their watch.json files
|
||||
# This preserves the UUID directory structure
|
||||
watch_count = 0
|
||||
tag_count = 0
|
||||
for entry in os.listdir(datastore_path):
|
||||
entry_path = os.path.join(datastore_path, entry)
|
||||
|
||||
@@ -88,22 +80,17 @@ def create_backup_tarball(datastore_path, update_number):
|
||||
if entry.startswith('.') or entry.startswith('before-update-'):
|
||||
continue
|
||||
|
||||
# Backup watch.json if exists
|
||||
# Check if this directory has a watch.json (indicates it's a watch UUID directory)
|
||||
watch_json = os.path.join(entry_path, "watch.json")
|
||||
if os.path.isfile(watch_json):
|
||||
# Add the watch.json file preserving directory structure
|
||||
tar.add(watch_json, arcname=f"{entry}/watch.json")
|
||||
watch_count += 1
|
||||
|
||||
if watch_count % 100 == 0:
|
||||
logger.debug(f"Backed up {watch_count} watch.json files...")
|
||||
|
||||
# Backup tag.json if exists
|
||||
tag_json = os.path.join(entry_path, "tag.json")
|
||||
if os.path.isfile(tag_json):
|
||||
tar.add(tag_json, arcname=f"{entry}/tag.json")
|
||||
tag_count += 1
|
||||
|
||||
logger.success(f"Backup created: {backup_filename} ({watch_count} watches from disk, {tag_count} tags from disk)")
|
||||
logger.success(f"Backup created: {backup_filename} ({watch_count} watches)")
|
||||
return backup_path
|
||||
|
||||
except Exception as e:
|
||||
@@ -143,7 +130,6 @@ class DatastoreUpdatesMixin:
|
||||
return updates_available
|
||||
|
||||
def run_updates(self, current_schema_version=None):
|
||||
import sys
|
||||
"""
|
||||
Run all pending schema updates sequentially.
|
||||
|
||||
@@ -161,29 +147,12 @@ class DatastoreUpdatesMixin:
|
||||
2. For each update > current schema version:
|
||||
- Create backup of datastore
|
||||
- Run update method
|
||||
- Update schema version and commit settings
|
||||
- Commit all watches and tags
|
||||
- Update schema version
|
||||
- Mark settings and watches dirty
|
||||
3. If any update fails, stop processing
|
||||
4. All changes saved via individual .commit() calls
|
||||
4. Save all changes immediately
|
||||
"""
|
||||
updates_available = self.get_updates_available()
|
||||
if self.data.get('watching'):
|
||||
test_watch = self.data['watching'].get(next(iter(self.data.get('watching', {}))))
|
||||
from ..model.Watch import model
|
||||
|
||||
if not isinstance(test_watch, model):
|
||||
import sys
|
||||
logger.critical("Cannot run updates! Watch structure must be re-hydrated back to a Watch model object!")
|
||||
sys.exit(1)
|
||||
|
||||
if self.data['settings']['application'].get('tags',{}):
|
||||
test_tag = self.data['settings']['application'].get('tags',{}).get(next(iter(self.data['settings']['application'].get('tags',{}))))
|
||||
from ..model.Tag import model as tag_model
|
||||
|
||||
if not isinstance(test_tag, tag_model):
|
||||
import sys
|
||||
logger.critical("Cannot run updates! Watch tag/group structure must be re-hydrated back to a Tag model object!")
|
||||
sys.exit(1)
|
||||
|
||||
# Determine current schema version
|
||||
if current_schema_version is None:
|
||||
@@ -199,7 +168,7 @@ class DatastoreUpdatesMixin:
|
||||
latest_update = updates_available[-1] if updates_available else 0
|
||||
logger.info(f"No schema version found and no watches exist - assuming fresh install, setting schema_version to {latest_update}")
|
||||
self.data['settings']['application']['schema_version'] = latest_update
|
||||
self.commit()
|
||||
self.mark_settings_dirty()
|
||||
return # No updates needed for fresh install
|
||||
else:
|
||||
# Has watches but no schema version - likely old datastore, run all updates
|
||||
@@ -225,15 +194,31 @@ class DatastoreUpdatesMixin:
|
||||
try:
|
||||
update_method = getattr(self, f"update_{update_n}")()
|
||||
except Exception as e:
|
||||
logger.critical(f"Error while trying update_{update_n}")
|
||||
logger.exception(e)
|
||||
sys.exit(1)
|
||||
logger.error(f"Error while trying update_{update_n}")
|
||||
logger.error(e)
|
||||
# Don't run any more updates
|
||||
return
|
||||
else:
|
||||
# Bump the version
|
||||
# Bump the version, important
|
||||
self.data['settings']['application']['schema_version'] = update_n
|
||||
self.commit()
|
||||
self.mark_settings_dirty()
|
||||
|
||||
logger.success(f"Update {update_n} completed")
|
||||
# CRITICAL: Mark all watches as dirty so changes are persisted
|
||||
# Most updates modify watches, and in the new individual watch.json structure,
|
||||
# we need to ensure those changes are saved
|
||||
logger.info(f"Marking all {len(self.data['watching'])} watches as dirty after update_{update_n} (so that it saves them to disk)")
|
||||
for uuid in self.data['watching'].keys():
|
||||
self.mark_watch_dirty(uuid)
|
||||
|
||||
# Save changes immediately after each update (more resilient than batching)
|
||||
logger.critical(f"Saving all changes after update_{update_n}")
|
||||
try:
|
||||
self._save_dirty_items()
|
||||
logger.success(f"Update {update_n} changes saved successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save update_{update_n} changes: {e}")
|
||||
# Don't raise - update already ran, but changes might not be persisted
|
||||
# The update will try to run again on next startup
|
||||
|
||||
# Track which updates ran
|
||||
updates_ran.append(update_n)
|
||||
@@ -483,14 +468,6 @@ class DatastoreUpdatesMixin:
|
||||
del self.data['watching'][uuid]['extract_title_as_title']
|
||||
|
||||
if self.data['settings']['application'].get('extract_title_as_title'):
|
||||
# Ensure 'ui' key exists (defensive for edge cases where base_config merge didn't happen)
|
||||
if 'ui' not in self.data['settings']['application']:
|
||||
self.data['settings']['application']['ui'] = {
|
||||
'use_page_title_in_list': True,
|
||||
'open_diff_in_new_tab': True,
|
||||
'socket_io_enabled': True,
|
||||
'favicons_enabled': True
|
||||
}
|
||||
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
|
||||
|
||||
def update_21(self):
|
||||
@@ -578,6 +555,27 @@ class DatastoreUpdatesMixin:
|
||||
logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
# Check if already migrated
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
if os.path.exists(changedetection_json):
|
||||
logger.info("Migration already completed (changedetection.json exists), skipping")
|
||||
return
|
||||
|
||||
# Check if we need to load legacy data
|
||||
from .legacy_loader import has_legacy_datastore, load_legacy_format
|
||||
|
||||
if not has_legacy_datastore(self.datastore_path):
|
||||
logger.info("No legacy datastore found, nothing to migrate")
|
||||
return
|
||||
|
||||
# Load legacy data from url-watches.json
|
||||
logger.critical("Loading legacy datastore from url-watches.json...")
|
||||
legacy_path = os.path.join(self.datastore_path, "url-watches.json")
|
||||
legacy_data = load_legacy_format(legacy_path)
|
||||
|
||||
if not legacy_data:
|
||||
raise Exception("Failed to load legacy datastore from url-watches.json")
|
||||
|
||||
# Populate settings from legacy data
|
||||
logger.info("Populating settings from legacy data...")
|
||||
watch_count = len(self.data['watching'])
|
||||
@@ -589,7 +587,9 @@ class DatastoreUpdatesMixin:
|
||||
saved_count = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
watch.commit()
|
||||
watch_dict = dict(watch)
|
||||
watch_dir = os.path.join(self.datastore_path, uuid)
|
||||
save_watch_atomic(watch_dir, uuid, watch_dict)
|
||||
saved_count += 1
|
||||
|
||||
if saved_count % 100 == 0:
|
||||
@@ -635,20 +635,36 @@ class DatastoreUpdatesMixin:
|
||||
|
||||
# Phase 4: Verify settings file exists
|
||||
logger.critical("Phase 4/4: Verifying changedetection.json exists...")
|
||||
changedetection_json_new_schema=os.path.join(self.datastore_path, "changedetection.json")
|
||||
if not os.path.isfile(changedetection_json_new_schema):
|
||||
import sys
|
||||
logger.critical("Migration failed, changedetection.json not found after update ran!")
|
||||
sys.exit(1)
|
||||
|
||||
if not os.path.isfile(changedetection_json):
|
||||
raise Exception(
|
||||
"Migration failed: changedetection.json not found after save. "
|
||||
"url-watches.json remains intact, safe to retry."
|
||||
)
|
||||
|
||||
logger.critical("Phase 4 complete: Verified changedetection.json exists")
|
||||
|
||||
# Success! Now reload from new format
|
||||
logger.critical("Reloading datastore from new format...")
|
||||
# write it to disk, it will be saved without ['watching'] in the JSON db because we find it from disk glob
|
||||
self._save_settings()
|
||||
self._load_state() # Includes load_watches
|
||||
logger.success("Datastore reloaded from new format successfully")
|
||||
|
||||
|
||||
# Verify all watches have hashes after migration
|
||||
missing_hashes = [uuid for uuid in self.data['watching'].keys() if uuid not in self._watch_hashes]
|
||||
if missing_hashes:
|
||||
logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after migration: {missing_hashes[:5]}")
|
||||
else:
|
||||
logger.success(f"All {len(self.data['watching'])} watches have valid hashes after migration")
|
||||
|
||||
# Set schema version to latest available update
|
||||
# This prevents re-running updates and re-marking all watches as dirty
|
||||
updates_available = self.get_updates_available()
|
||||
latest_schema = updates_available[-1] if updates_available else 26
|
||||
self.data['settings']['application']['schema_version'] = latest_schema
|
||||
self.mark_settings_dirty()
|
||||
logger.info(f"Set schema_version to {latest_schema} (migration complete, all watches already saved)")
|
||||
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
|
||||
logger.critical("=" * 80)
|
||||
@@ -667,111 +683,4 @@ class DatastoreUpdatesMixin:
|
||||
logger.info("")
|
||||
|
||||
def update_26(self):
|
||||
self.migrate_legacy_db_format()
|
||||
|
||||
# Re-run tag to JSON migration
|
||||
def update_29(self):
|
||||
|
||||
"""
|
||||
Migrate tags to individual tag.json files.
|
||||
|
||||
Tags are currently saved only in changedetection.json (settings).
|
||||
This migration ALSO saves them to individual {uuid}/tag.json files,
|
||||
similar to how watches are stored (dual storage).
|
||||
|
||||
Benefits:
|
||||
- Allows atomic tag updates without rewriting entire settings
|
||||
- Enables independent tag versioning/backup
|
||||
- Maintains backwards compatibility (tags stay in settings too)
|
||||
"""
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("Running migration: Individual tag persistence (update_28)")
|
||||
logger.critical("Creating individual tag.json files")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
tags = self.data['settings']['application'].get('tags', {})
|
||||
tag_count = len(tags)
|
||||
|
||||
if tag_count == 0:
|
||||
logger.info("No tags found, skipping migration")
|
||||
return
|
||||
|
||||
logger.info(f"Migrating {tag_count} tags to individual tag.json files...")
|
||||
|
||||
saved_count = 0
|
||||
failed_count = 0
|
||||
|
||||
for uuid, tag_data in tags.items():
|
||||
if os.path.isfile(os.path.join(self.datastore_path, uuid, "tag.json")):
|
||||
logger.debug(f"Tag {uuid} tag.json exists, skipping")
|
||||
continue
|
||||
try:
|
||||
tag_data.commit()
|
||||
saved_count += 1
|
||||
if saved_count % 10 == 0:
|
||||
logger.info(f" Progress: {saved_count}/{tag_count} tags migrated...")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save tag {uuid} ({tag_data.get('title', 'unknown')}): {e}")
|
||||
failed_count += 1
|
||||
|
||||
if failed_count > 0:
|
||||
logger.warning(f"Migration complete: {saved_count} tags saved, {failed_count} tags FAILED")
|
||||
else:
|
||||
logger.success(f"Migration complete: {saved_count} tags saved to individual tag.json files")
|
||||
|
||||
# Tags remain in settings for backwards compatibility AND easy access
|
||||
# On next load, _load_tags() will read from tag.json files and merge with settings
|
||||
logger.info("Tags saved to both settings AND individual tag.json files")
|
||||
logger.info("Future tag edits will update both locations (dual storage)")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
# write it to disk, it will be saved without ['tags'] in the JSON db because we find it from disk glob
|
||||
# (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
|
||||
self._save_settings()
|
||||
|
||||
def update_30(self):
|
||||
"""Migrate restock_settings out of watch.json into restock_diff.json processor config file.
|
||||
|
||||
Previously, restock_diff processor settings (in_stock_processing, follow_price_changes, etc.)
|
||||
were stored directly in the watch dict (watch.json). They now belong in a separate per-watch
|
||||
processor config file (restock_diff.json) consistent with the processor_config_* API system.
|
||||
|
||||
For tags: restock_settings key is renamed to processor_config_restock_diff in the tag dict,
|
||||
matching what the API writes when updating a tag.
|
||||
|
||||
Safe to re-run: skips watches that already have a restock_diff.json, skips tags that already
|
||||
have processor_config_restock_diff set.
|
||||
"""
|
||||
import json
|
||||
|
||||
# --- Watches ---
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if watch.get('processor') != 'restock_diff':
|
||||
continue
|
||||
restock_settings = watch.get('restock_settings')
|
||||
if not restock_settings:
|
||||
continue
|
||||
|
||||
data_dir = watch.data_dir
|
||||
if data_dir:
|
||||
watch.ensure_data_dir_exists()
|
||||
filepath = os.path.join(data_dir, 'restock_diff.json')
|
||||
if not os.path.isfile(filepath):
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump({'restock_diff': restock_settings}, f, indent=2)
|
||||
logger.info(f"update_30: migrated restock_settings → {filepath}")
|
||||
|
||||
del self.data['watching'][uuid]['restock_settings']
|
||||
watch.commit()
|
||||
|
||||
# --- Tags ---
|
||||
for tag_uuid, tag in self.data['settings']['application']['tags'].items():
|
||||
restock_settings = tag.get('restock_settings')
|
||||
if not restock_settings or tag.get('processor_config_restock_diff'):
|
||||
continue
|
||||
tag['processor_config_restock_diff'] = restock_settings
|
||||
del tag['restock_settings']
|
||||
tag.commit()
|
||||
logger.info(f"update_30: migrated tag {tag_uuid} restock_settings → processor_config_restock_diff")
|
||||
|
||||
self.migrate_legacy_db_format()
|
||||
@@ -44,26 +44,13 @@
|
||||
<td><code>{{ '{{preview_url}}' }}</code></td>
|
||||
<td>{{ _('The URL of the preview page generated by changedetection.io.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{change_datetime}}' }}</code></td>
|
||||
<td>{{ _('Date/time of the change, accepts format=, change_datetime(format=\'%A\')\', default is \'%Y-%m-%d %H:%M:%S %Z\'') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_url}}' }}</code></td>
|
||||
<td>{{ _('The URL of the diff output for the watch.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_url}}' }}</code></td>
|
||||
<td>{{ _('The URL of the diff output for the watch.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff}}' }}</code></td>
|
||||
<td>{{ _('The diff output - only changes, additions, and removals') }}<br>
|
||||
<small>
|
||||
{{ _('All diff variants accept') }} <code>lines=</code>, <code>context=</code>, <code>word_diff=</code>, <code>ignore_junk=</code> {{ _('args, e.g.') }}
|
||||
<code>{{ '{{diff(lines=10)}}' }}</code>, <code>{{ '{{diff_added(lines=5, context=2)}}' }}</code>
|
||||
</small>
|
||||
</td>
|
||||
<td>{{ _('The diff output - only changes, additions, and removals') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_clean}}' }}</code></td>
|
||||
@@ -98,14 +85,6 @@
|
||||
<td><code>{{ '{{diff_patch}}' }}</code></td>
|
||||
<td>{{ _('The diff output - patch in unified format') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_changed_from}}' }}</code></td>
|
||||
<td>{{ _('Only the changed fragments from the previous version — e.g. the old price. Multiple changes joined by newline.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_changed_to}}' }}</code></td>
|
||||
<td>{{ _('Only the changed fragments from the new version — e.g. the new price. Multiple changes joined by newline.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{current_snapshot}}' }}</code></td>
|
||||
<td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }}
|
||||
|
||||
@@ -45,10 +45,6 @@
|
||||
<script src="{{url_for('static_content', group='js', filename='socket.io.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='realtime.js')}}" defer></script>
|
||||
{% endif %}
|
||||
{%- set _html_head_extras = get_html_head_extras() -%}
|
||||
{%- if _html_head_extras %}
|
||||
{{ _html_head_extras | safe }}
|
||||
{%- endif %}
|
||||
</head>
|
||||
|
||||
<body class="{{extra_classes}}">
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
<li>{{ _('Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this monitor') }}</li>
|
||||
<li>{{ _('Each line is processed separately (think of each line as "OR")') }}</li>
|
||||
<li>{{ _('Note: Wrap in forward slash / to use regex example:') }} <code>/foo\d/</code></li>
|
||||
<li>{{ _('You can also use')}} <a href="#conditions">{{ _('conditions')}}</a> - {{ _('"Page text" - with Contains, Starts With, Not Contains and many more' ) }} <code>/foo\d/</code></li>
|
||||
</ul>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
import psutil
|
||||
import time
|
||||
from threading import Thread
|
||||
import multiprocessing
|
||||
|
||||
import pytest
|
||||
import arrow
|
||||
@@ -14,10 +13,6 @@ import sys
|
||||
# When test server is slow/unresponsive, workers fail fast instead of holding UUIDs for 45s
|
||||
# This prevents exponential priority growth from repeated deferrals (priority × 10 each defer)
|
||||
os.environ['DEFAULT_SETTINGS_REQUESTS_TIMEOUT'] = '5'
|
||||
# Test server runs on localhost (127.0.0.1) which is a private IP.
|
||||
# Allow it globally so all existing tests keep working; test_ssrf_protection
|
||||
# uses monkeypatch to temporarily override this for its own assertions.
|
||||
os.environ['ALLOW_IANA_RESTRICTED_ADDRESSES'] = 'true'
|
||||
|
||||
from changedetectionio.flask_app import init_app_secret, changedetection_app
|
||||
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
|
||||
@@ -192,34 +187,6 @@ def cleanup(datastore_path):
|
||||
if os.path.isfile(f):
|
||||
os.unlink(f)
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure pytest environment before tests run.
|
||||
|
||||
CRITICAL: Set multiprocessing start method to 'fork' for Python 3.14+ compatibility.
|
||||
|
||||
Python 3.14 changed the default start method from 'fork' to 'forkserver' on Linux.
|
||||
The forkserver method requires all objects to be picklable, but pytest-flask's
|
||||
LiveServer uses nested functions that can't be pickled.
|
||||
|
||||
Setting 'fork' explicitly:
|
||||
- Maintains compatibility with Python 3.10-3.13 (where 'fork' was already default)
|
||||
- Fixes Python 3.14 pickling errors
|
||||
- Only affects Unix-like systems (Windows uses 'spawn' regardless)
|
||||
|
||||
See: https://github.com/python/cpython/issues/126831
|
||||
See: https://docs.python.org/3/whatsnew/3.14.html
|
||||
"""
|
||||
# Only set if not already set (respects existing configuration)
|
||||
if multiprocessing.get_start_method(allow_none=True) is None:
|
||||
try:
|
||||
# 'fork' is available on Unix-like systems (Linux, macOS)
|
||||
# On Windows, this will have no effect as 'spawn' is the only option
|
||||
multiprocessing.set_start_method('fork', force=False)
|
||||
logger.debug("Set multiprocessing start method to 'fork' for Python 3.14+ compatibility")
|
||||
except (ValueError, RuntimeError):
|
||||
# Already set, not available on this platform, or context already created
|
||||
pass
|
||||
|
||||
def pytest_addoption(parser):
|
||||
"""Add custom command-line options for pytest.
|
||||
|
||||
@@ -341,6 +308,10 @@ def prepare_test_function(live_server, datastore_path):
|
||||
|
||||
|
||||
|
||||
# Prevent background thread from writing during cleanup/reload
|
||||
datastore.needs_write = False
|
||||
datastore.needs_write_urgent = False
|
||||
|
||||
# CRITICAL: Clean up any files from previous tests
|
||||
# This ensures a completely clean directory
|
||||
cleanup(datastore_path)
|
||||
@@ -364,7 +335,6 @@ def prepare_test_function(live_server, datastore_path):
|
||||
# Cleanup: Clear watches and queue after test
|
||||
try:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from pathlib import Path
|
||||
|
||||
# Clear the queue to prevent leakage to next test
|
||||
while not update_q.empty():
|
||||
@@ -374,18 +344,7 @@ def prepare_test_function(live_server, datastore_path):
|
||||
break
|
||||
|
||||
datastore.data['watching'] = {}
|
||||
|
||||
# Delete any old watch metadata JSON files
|
||||
base_path = Path(datastore.datastore_path).resolve()
|
||||
max_depth = 2
|
||||
|
||||
for file in base_path.rglob("*.json"):
|
||||
# Calculate depth relative to base path
|
||||
depth = len(file.relative_to(base_path).parts) - 1
|
||||
|
||||
if depth <= max_depth and file.is_file():
|
||||
file.unlink()
|
||||
|
||||
datastore.needs_write = True
|
||||
except Exception as e:
|
||||
logger.warning(f"Error during datastore cleanup: {e}")
|
||||
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
"""Test that plugins can inject HTML into base.html <head> via get_html_head_extras hookimpl."""
|
||||
import pytest
|
||||
from flask import url_for, Response
|
||||
|
||||
from changedetectionio.pluggy_interface import hookimpl, plugin_manager
|
||||
|
||||
_MY_JS = "console.log('my_module_content loaded');"
|
||||
_MY_CSS = ".my-module-example { color: red; }"
|
||||
|
||||
|
||||
class _HeadExtrasPlugin:
|
||||
"""Test plugin that injects tags pointing at its own Flask routes."""
|
||||
|
||||
@hookimpl
|
||||
def get_html_head_extras(self):
|
||||
css_url = url_for('test_plugin_my_module_content_css')
|
||||
js_url = url_for('test_plugin_my_module_content_js')
|
||||
return (
|
||||
f'<link rel="stylesheet" id="test-head-extra-css" href="{css_url}">\n'
|
||||
f'<script id="test-head-extra-js" src="{js_url}" defer></script>'
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def plugin_routes(live_server):
|
||||
"""Register plugin asset routes once per module (Flask routes can't be added twice)."""
|
||||
app = live_server.app
|
||||
|
||||
@app.route('/test-plugin/my_module_content/css')
|
||||
def test_plugin_my_module_content_css():
|
||||
return Response(_MY_CSS, mimetype='text/css',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
@app.route('/test-plugin/my_module_content/js')
|
||||
def test_plugin_my_module_content_js():
|
||||
return Response(_MY_JS, mimetype='application/javascript',
|
||||
headers={'Cache-Control': 'max-age=3600'})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def head_extras_plugin(plugin_routes):
|
||||
"""Register the hookimpl for one test then unregister it — function-scoped for clean isolation."""
|
||||
plugin = _HeadExtrasPlugin()
|
||||
plugin_manager.register(plugin, name="test_head_extras")
|
||||
yield plugin
|
||||
plugin_manager.unregister(name="test_head_extras")
|
||||
|
||||
|
||||
def test_plugin_html_injected_into_head(client, live_server, measure_memory_usage, datastore_path, head_extras_plugin):
|
||||
"""get_html_head_extras output must appear inside <head> in the rendered page."""
|
||||
res = client.get(url_for("watchlist.index"), follow_redirects=True)
|
||||
assert res.status_code == 200
|
||||
assert b'id="test-head-extra-css"' in res.data, "Plugin <link> tag missing from rendered page"
|
||||
assert b'id="test-head-extra-js"' in res.data, "Plugin <script> tag missing from rendered page"
|
||||
|
||||
head_end = res.data.find(b'</head>')
|
||||
assert head_end != -1
|
||||
for marker in (b'id="test-head-extra-css"', b'id="test-head-extra-js"'):
|
||||
pos = res.data.find(marker)
|
||||
assert pos != -1 and pos < head_end, f"{marker} must appear before </head>"
|
||||
|
||||
|
||||
def test_plugin_js_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
|
||||
"""The plugin-registered JS route must return JS with the right Content-Type."""
|
||||
res = client.get(url_for('test_plugin_my_module_content_js'))
|
||||
assert res.status_code == 200
|
||||
assert 'javascript' in res.content_type
|
||||
assert _MY_JS.encode() in res.data
|
||||
|
||||
|
||||
def test_plugin_css_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
|
||||
"""The plugin-registered CSS route must return CSS with the right Content-Type."""
|
||||
res = client.get(url_for('test_plugin_my_module_content_css'))
|
||||
assert res.status_code == 200
|
||||
assert 'css' in res.content_type
|
||||
assert _MY_CSS.encode() in res.data
|
||||
|
||||
|
||||
def test_no_extras_without_plugin(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""With no hookimpl registered the markers must not appear (isolation check)."""
|
||||
res = client.get(url_for("watchlist.index"), follow_redirects=True)
|
||||
assert b'id="test-head-extra-css"' not in res.data
|
||||
assert b'id="test-head-extra-js"' not in res.data
|
||||
@@ -11,10 +11,10 @@ from changedetectionio.tests.util import set_original_response, set_modified_res
|
||||
set_longer_modified_response, delete_all_watches
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
# NOTE - RELIES ON mailserver as hostname running, see github build recipes
|
||||
smtp_test_server = os.getenv('SMTP_TEST_MAILSERVER', 'mailserver')
|
||||
smtp_test_server = 'mailserver'
|
||||
|
||||
ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys())
|
||||
|
||||
|
||||
@@ -170,14 +170,6 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
assert b'(changed) Which is across' in res.data
|
||||
assert b'Some text thats the same' in res.data
|
||||
|
||||
# Fetch the difference between two versions (default text format)
|
||||
res = client.get(
|
||||
url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+"?changesOnly=true",
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
assert b'Some text thats the same' not in res.data
|
||||
|
||||
# Test htmlcolor format
|
||||
res = client.get(
|
||||
@@ -336,68 +328,6 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
|
||||
)
|
||||
assert len(res.json) == 0, "Watch list should be empty"
|
||||
|
||||
def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test the full round trip, this way we test the default Model fits back into OpenAPI spec
|
||||
:param client:
|
||||
:param live_server:
|
||||
:param measure_memory_usage:
|
||||
:param datastore_path:
|
||||
:return:
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create new
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": test_url}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 201
|
||||
uuid = res.json.get('uuid')
|
||||
|
||||
# Now fetch it and send it back
|
||||
|
||||
res = client.get(
|
||||
url_for("watch", uuid=uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
|
||||
watch=res.json
|
||||
|
||||
# Be sure that 'readOnly' values are never updated in the real watch
|
||||
watch['last_changed'] = 454444444444
|
||||
watch['date_created'] = 454444444444
|
||||
|
||||
# HTTP PUT ( UPDATE an existing watch )
|
||||
res = client.put(
|
||||
url_for("watch", uuid=uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps(watch),
|
||||
)
|
||||
if res.status_code != 200:
|
||||
print(f"\n=== PUT failed with {res.status_code} ===")
|
||||
print(f"Error: {res.data}")
|
||||
assert res.status_code == 200, "HTTP PUT update was sent OK"
|
||||
|
||||
res = client.get(
|
||||
url_for("watch", uuid=uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
last_changed = res.json.get('last_changed')
|
||||
assert last_changed != 454444444444
|
||||
assert last_changed != "454444444444"
|
||||
|
||||
date_created = res.json.get('date_created')
|
||||
assert date_created != 454444444444
|
||||
assert date_created != "454444444444"
|
||||
|
||||
|
||||
def test_access_denied(client, live_server, measure_memory_usage, datastore_path):
|
||||
# `config_api_token_enabled` Should be On by default
|
||||
res = client.get(
|
||||
@@ -471,9 +401,6 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datasto
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
if res.status_code != 201:
|
||||
print(f"\n=== POST createwatch failed with {res.status_code} ===")
|
||||
print(f"Response: {res.data}")
|
||||
assert res.status_code == 201
|
||||
|
||||
wait_for_all_checks(client)
|
||||
@@ -537,12 +464,8 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datasto
|
||||
)
|
||||
|
||||
assert res.status_code == 400, "Should get error 400 when we give a field that doesnt exist"
|
||||
# Backend validation now rejects unknown fields with a clear error message
|
||||
assert (b'Unknown field' in res.data or
|
||||
b'Additional properties are not allowed' in res.data or
|
||||
b'Unevaluated properties are not allowed' in res.data or
|
||||
b'does not match any of the regexes' in res.data), \
|
||||
"Should reject unknown fields with validation error"
|
||||
# Message will come from `flask_expects_json`
|
||||
assert b'Additional properties are not allowed' in res.data
|
||||
|
||||
|
||||
# Try a XSS URL
|
||||
@@ -563,7 +486,6 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Test 1: Basic import with tag
|
||||
res = client.post(
|
||||
url_for("import") + "?tag=import-test",
|
||||
data='https://website1.com\r\nhttps://website2.com',
|
||||
@@ -582,321 +504,6 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
|
||||
res = client.get(url_for('tags.tags_overview_page'))
|
||||
assert b'import-test' in res.data
|
||||
|
||||
# Test 2: Import with watch configuration fields (issue #3845)
|
||||
# Test string field (include_filters), boolean (paused), and processor
|
||||
import urllib.parse
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'config-test',
|
||||
'include_filters': 'div.content',
|
||||
'paused': 'true',
|
||||
'processor': 'text_json_diff',
|
||||
'title': 'Imported with Config'
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website3.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
assert len(res.json) == 1
|
||||
uuid = res.json[0]
|
||||
|
||||
# Verify the configuration was applied
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert watch['include_filters'] == ['div.content'], "include_filters should be set as array"
|
||||
assert watch['paused'] == True, "paused should be True"
|
||||
assert watch['processor'] == 'text_json_diff', "processor should be set"
|
||||
assert watch['title'] == 'Imported with Config', "title should be set"
|
||||
|
||||
# Test 3: Import with array field (notification_urls) - using valid Apprise format
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'notification-test',
|
||||
'notification_urls': 'mailto://test@example.com,mailto://admin@example.com'
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website4.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
uuid = res.json[0]
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert isinstance(watch['notification_urls'], list), "notification_urls must be stored as a list"
|
||||
assert len(watch['notification_urls']) == 2, "notification_urls should have 2 entries"
|
||||
assert 'mailto://test@example.com' in watch['notification_urls'], "notification_urls should contain first email"
|
||||
assert 'mailto://admin@example.com' in watch['notification_urls'], "notification_urls should contain second email"
|
||||
|
||||
# Test 4: Import with object field (time_between_check)
|
||||
import json
|
||||
time_config = json.dumps({"hours": 2, "minutes": 30})
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'schedule-test',
|
||||
'time_between_check': time_config
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website5.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
uuid = res.json[0]
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert watch['time_between_check']['hours'] == 2, "time_between_check hours should be 2"
|
||||
assert watch['time_between_check']['minutes'] == 30, "time_between_check minutes should be 30"
|
||||
|
||||
# Test 5: Import with invalid processor (should fail)
|
||||
res = client.post(
|
||||
url_for("import") + "?processor=invalid_processor",
|
||||
data='https://website6.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 400, "Should reject invalid processor"
|
||||
assert b"Invalid processor" in res.data, "Error message should mention invalid processor"
|
||||
|
||||
# Test 6: Import with invalid field (should fail)
|
||||
res = client.post(
|
||||
url_for("import") + "?unknown_field=value",
|
||||
data='https://website7.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 400, "Should reject unknown field"
|
||||
assert b"Unknown watch configuration parameter" in res.data, "Error message should mention unknown parameter"
|
||||
|
||||
# Test 7: Import with complex nested array (browser_steps) - array of objects
|
||||
browser_steps = json.dumps([
|
||||
{"operation": "wait", "selector": "5", "optional_value": ""},
|
||||
{"operation": "click", "selector": "button.submit", "optional_value": ""}
|
||||
])
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'browser-test',
|
||||
'browser_steps': browser_steps
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website8.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200, "Should accept browser_steps array"
|
||||
uuid = res.json[0]
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert len(watch['browser_steps']) == 2, "Should have 2 browser steps"
|
||||
assert watch['browser_steps'][0]['operation'] == 'wait', "First step should be wait"
|
||||
assert watch['browser_steps'][1]['operation'] == 'click', "Second step should be click"
|
||||
assert watch['browser_steps'][1]['selector'] == 'button.submit', "Second step selector should be button.submit"
|
||||
|
||||
# Cleanup
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_import_small_synchronous(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that small imports (< threshold) are processed synchronously"""
|
||||
from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Use local test endpoint to avoid network delays
|
||||
test_url_base = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create URLs: threshold - 1 to stay under limit
|
||||
num_urls = min(5, IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD - 1) # Use small number for faster test
|
||||
urls = '\n'.join([f'{test_url_base}?id=small-{i}' for i in range(num_urls)])
|
||||
|
||||
# Import small batch
|
||||
res = client.post(
|
||||
url_for("import") + "?tag=small-test",
|
||||
data=urls,
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Should return 200 OK with UUID list (synchronous)
|
||||
assert res.status_code == 200, f"Should return 200 for small imports, got {res.status_code}"
|
||||
assert isinstance(res.json, list), "Response should be a list of UUIDs"
|
||||
assert len(res.json) == num_urls, f"Should return {num_urls} UUIDs, got {len(res.json)}"
|
||||
|
||||
# Verify all watches were created immediately
|
||||
for uuid in res.json:
|
||||
assert uuid in live_server.app.config['DATASTORE'].data['watching'], \
|
||||
f"Watch {uuid} should exist immediately after synchronous import"
|
||||
|
||||
print(f"\n✓ Successfully created {num_urls} watches synchronously")
|
||||
|
||||
|
||||
def test_api_import_large_background(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that large imports (>= threshold) are processed in background thread"""
|
||||
from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
|
||||
import time
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Use local test endpoint to avoid network delays
|
||||
test_url_base = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create URLs: threshold + 10 to trigger background processing
|
||||
num_urls = IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD + 10
|
||||
urls = '\n'.join([f'{test_url_base}?id=bulk-{i}' for i in range(num_urls)])
|
||||
|
||||
# Import large batch
|
||||
res = client.post(
|
||||
url_for("import") + "?tag=bulk-test",
|
||||
data=urls,
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Should return 202 Accepted (background processing)
|
||||
assert res.status_code == 202, f"Should return 202 for large imports, got {res.status_code}"
|
||||
assert b"background" in res.data.lower(), "Response should mention background processing"
|
||||
|
||||
# Extract expected count from response
|
||||
response_json = res.json
|
||||
assert 'count' in response_json, "Response should include count"
|
||||
assert response_json['count'] == num_urls, f"Count should be {num_urls}, got {response_json['count']}"
|
||||
|
||||
# Wait for background thread to complete (with timeout)
|
||||
max_wait = 10 # seconds
|
||||
wait_interval = 0.5
|
||||
elapsed = 0
|
||||
watches_created = 0
|
||||
|
||||
while elapsed < max_wait:
|
||||
time.sleep(wait_interval)
|
||||
elapsed += wait_interval
|
||||
|
||||
# Count how many watches have been created
|
||||
watches_created = len([
|
||||
uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
|
||||
if 'id=bulk-' in watch['url']
|
||||
])
|
||||
|
||||
if watches_created == num_urls:
|
||||
break
|
||||
|
||||
# Verify all watches were created
|
||||
assert watches_created == num_urls, \
|
||||
f"Expected {num_urls} watches to be created, but found {watches_created} after {elapsed}s"
|
||||
|
||||
# Verify watches have correct configuration
|
||||
bulk_watches = [
|
||||
watch for watch in live_server.app.config['DATASTORE'].data['watching'].values()
|
||||
if 'id=bulk-' in watch['url']
|
||||
]
|
||||
|
||||
assert len(bulk_watches) == num_urls, "All bulk watches should exist"
|
||||
|
||||
# Check that they have the correct tag
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
# Get UUIDs of bulk watches by filtering the datastore keys
|
||||
bulk_watch_uuids = [
|
||||
uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
|
||||
if 'id=bulk-' in watch['url']
|
||||
]
|
||||
for watch_uuid in bulk_watch_uuids:
|
||||
tags = datastore.get_all_tags_for_watch(uuid=watch_uuid)
|
||||
tag_names = [t['title'] for t in tags.values()]
|
||||
assert 'bulk-test' in tag_names, f"Watch {watch_uuid} should have 'bulk-test' tag"
|
||||
|
||||
print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
|
||||
|
||||
|
||||
def test_api_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that processor_config_restock_diff is accepted by the API for watches using
|
||||
restock_diff processor, that its schema is validated (enum values, types), and that
|
||||
genuinely unknown fields are rejected with an error that originates from the
|
||||
OpenAPI spec validation layer.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create a watch in restock_diff mode WITH processor_config in the POST body (matches the API docs example)
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"processor": "restock_diff",
|
||||
"title": "Restock test",
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": True,
|
||||
"price_change_min": 8888888.0,
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 201
|
||||
watch_uuid = res.json.get('uuid')
|
||||
assert is_valid_uuid(watch_uuid)
|
||||
|
||||
# Verify the value set on POST is reflected in the UI edit page (not just via PUT)
|
||||
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
|
||||
assert res.status_code == 200
|
||||
assert b'8888888' in res.data, "price_change_min set via POST should appear in the UI edit form"
|
||||
|
||||
# Valid processor_config_restock_diff update via PUT should also be accepted
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "all_changes",
|
||||
"follow_price_changes": False,
|
||||
"price_change_min": 8888888.0,
|
||||
"price_change_max": 9999999.0,
|
||||
}
|
||||
}),
|
||||
)
|
||||
assert res.status_code == 200, f"Valid processor_config_restock_diff should be accepted, got: {res.data}"
|
||||
|
||||
# Verify the updated value is still reflected in the UI edit page
|
||||
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
|
||||
assert res.status_code == 200
|
||||
assert b'8888888' in res.data, "price_change_min set via PUT should appear in the UI edit form"
|
||||
|
||||
# An invalid enum value inside processor_config_restock_diff should be rejected by the spec
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "not_a_valid_enum_value"
|
||||
}
|
||||
}),
|
||||
)
|
||||
assert res.status_code == 400, "Invalid enum value in processor config should be rejected"
|
||||
assert b'Validation failed' in res.data, "Rejection should come from OpenAPI spec validation layer"
|
||||
|
||||
# A completely unknown field should be rejected (either by OpenAPI spec validation or
|
||||
# the application-level field filter — both are acceptable gatekeepers)
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({"field_that_is_not_in_the_spec_at_all": "some value"}),
|
||||
)
|
||||
assert res.status_code == 400, "Unknown fields should be rejected"
|
||||
assert (b'Validation failed' in res.data or b'Unknown field' in res.data), \
|
||||
"Rejection should come from either the OpenAPI spec validation layer or application field filter"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
@@ -1023,9 +630,7 @@ def test_api_url_validation(client, live_server, measure_memory_usage, datastore
|
||||
)
|
||||
assert res.status_code == 400, "Updating watch URL to null should fail"
|
||||
# Accept either OpenAPI validation error or our custom validation error
|
||||
assert (b'URL cannot be null' in res.data or
|
||||
b'Validation failed' in res.data or
|
||||
b'validation error' in res.data.lower())
|
||||
assert b'URL cannot be null' in res.data or b'OpenAPI validation failed' in res.data or b'validation error' in res.data.lower()
|
||||
|
||||
# Test 8: UPDATE to empty string URL should fail
|
||||
res = client.put(
|
||||
@@ -1112,140 +717,3 @@ def test_api_url_validation(client, live_server, measure_memory_usage, datastore
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_time_between_check_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that time_between_check validation works correctly:
|
||||
- When time_between_check_use_default is false, at least one time value must be > 0
|
||||
- Values must be valid integers
|
||||
"""
|
||||
import json
|
||||
from flask import url_for
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Test 1: time_between_check_use_default=false with NO time_between_check should fail
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"time_between_check_use_default": False
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 400, "Should fail when time_between_check_use_default=false with no time_between_check"
|
||||
assert b"At least one time interval" in res.data, "Error message should mention time interval requirement"
|
||||
|
||||
# Test 2: time_between_check_use_default=false with ALL zeros should fail
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {
|
||||
"weeks": 0,
|
||||
"days": 0,
|
||||
"hours": 0,
|
||||
"minutes": 0,
|
||||
"seconds": 0
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 400, "Should fail when all time values are 0"
|
||||
assert b"At least one time interval" in res.data, "Error message should mention time interval requirement"
|
||||
|
||||
# Test 3: time_between_check_use_default=false with NULL values should fail
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {
|
||||
"weeks": None,
|
||||
"days": None,
|
||||
"hours": None,
|
||||
"minutes": None,
|
||||
"seconds": None
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 400, "Should fail when all time values are null"
|
||||
assert b"At least one time interval" in res.data, "Error message should mention time interval requirement"
|
||||
|
||||
# Test 4: time_between_check_use_default=false with valid hours should succeed
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {
|
||||
"hours": 2
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, "Should succeed with valid hours value"
|
||||
uuid1 = res.json.get('uuid')
|
||||
|
||||
# Test 5: time_between_check_use_default=false with valid minutes should succeed
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example2.com",
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {
|
||||
"minutes": 30
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, "Should succeed with valid minutes value"
|
||||
uuid2 = res.json.get('uuid')
|
||||
|
||||
# Test 6: time_between_check_use_default=true (or missing) with no time_between_check should succeed (uses defaults)
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example3.com",
|
||||
"time_between_check_use_default": True
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, "Should succeed when using default settings"
|
||||
uuid3 = res.json.get('uuid')
|
||||
|
||||
# Test 7: Default behavior (no time_between_check_use_default field) should use defaults and succeed
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example4.com"
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201, "Should succeed with default behavior (using global settings)"
|
||||
uuid4 = res.json.get('uuid')
|
||||
|
||||
# Test 8: Verify integer type validation - string should fail (OpenAPI validation)
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example5.com",
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {
|
||||
"hours": "not_a_number"
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 400, "Should fail when time value is not an integer"
|
||||
assert b"Validation failed" in res.data or b"not of type" in res.data, "Should mention validation/type error"
|
||||
|
||||
# Cleanup
|
||||
for uuid in [uuid1, uuid2, uuid3, uuid4]:
|
||||
client.delete(
|
||||
url_for("watch", uuid=uuid),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
|
||||
@@ -107,7 +107,7 @@ def test_watch_notification_urls_validation(client, live_server, measure_memory_
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject non-list notification_urls"
|
||||
assert b"Validation failed" in res.data or b"is not of type" in res.data
|
||||
assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
|
||||
|
||||
# Test 6: Verify original URLs are preserved after failed update
|
||||
res = client.get(
|
||||
@@ -159,7 +159,7 @@ def test_tag_notification_urls_validation(client, live_server, measure_memory_us
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject non-list notification_urls"
|
||||
assert b"Validation failed" in res.data or b"is not of type" in res.data
|
||||
assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
|
||||
|
||||
# Test 4: Verify original URLs are preserved after failed update
|
||||
tag = datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
|
||||
@@ -9,51 +9,7 @@ by testing various scenarios that should trigger validation errors.
|
||||
import time
|
||||
import json
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
||||
|
||||
|
||||
def test_openapi_merged_spec_contains_restock_fields():
|
||||
"""
|
||||
Unit test: verify that build_merged_spec_dict() correctly merges the
|
||||
restock_diff processor api.yaml into the base spec so that
|
||||
WatchBase.properties includes processor_config_restock_diff with all
|
||||
expected sub-fields. No live server required.
|
||||
"""
|
||||
from changedetectionio.api import build_merged_spec_dict
|
||||
|
||||
spec = build_merged_spec_dict()
|
||||
schemas = spec['components']['schemas']
|
||||
|
||||
# The merged schema for processor_config_restock_diff should exist
|
||||
assert 'processor_config_restock_diff' in schemas, \
|
||||
"processor_config_restock_diff schema missing from merged spec"
|
||||
|
||||
restock_schema = schemas['processor_config_restock_diff']
|
||||
props = restock_schema.get('properties', {})
|
||||
|
||||
expected_fields = {
|
||||
'in_stock_processing',
|
||||
'follow_price_changes',
|
||||
'price_change_min',
|
||||
'price_change_max',
|
||||
'price_change_threshold_percent',
|
||||
}
|
||||
missing = expected_fields - set(props.keys())
|
||||
assert not missing, f"Missing fields in processor_config_restock_diff schema: {missing}"
|
||||
|
||||
# in_stock_processing must be an enum with the three valid values
|
||||
enum_values = set(props['in_stock_processing'].get('enum', []))
|
||||
assert enum_values == {'in_stock_only', 'all_changes', 'off'}, \
|
||||
f"Unexpected enum values for in_stock_processing: {enum_values}"
|
||||
|
||||
# WatchBase.properties must carry a $ref to the restock schema so the
|
||||
# validation middleware can enforce it on every POST/PUT to /watch
|
||||
watchbase_props = schemas['WatchBase']['properties']
|
||||
assert 'processor_config_restock_diff' in watchbase_props, \
|
||||
"processor_config_restock_diff not wired into WatchBase.properties"
|
||||
ref = watchbase_props['processor_config_restock_diff'].get('$ref', '')
|
||||
assert 'processor_config_restock_diff' in ref, \
|
||||
f"WatchBase.processor_config_restock_diff should $ref the schema, got: {ref}"
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -70,8 +26,7 @@ def test_openapi_validation_invalid_content_type_on_create_watch(client, live_se
|
||||
|
||||
# Should get 400 error due to OpenAPI validation failure
|
||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||
assert b"Validation failed" in res.data, "Should contain validation error message"
|
||||
delete_all_watches(client)
|
||||
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
|
||||
|
||||
|
||||
def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -88,8 +43,7 @@ def test_openapi_validation_missing_required_field_create_watch(client, live_ser
|
||||
|
||||
# Should get 400 error due to missing required field
|
||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||
assert b"Validation failed" in res.data, "Should contain validation error message"
|
||||
delete_all_watches(client)
|
||||
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
|
||||
|
||||
|
||||
def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -126,10 +80,7 @@ def test_openapi_validation_invalid_field_in_request_body(client, live_server, m
|
||||
# Should get 400 error due to invalid field (this will be caught by internal validation)
|
||||
# Note: This tests the flow where OpenAPI validation passes but internal validation catches it
|
||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||
# Backend validation now returns "Unknown field(s):" message
|
||||
assert b"Unknown field" in res.data, \
|
||||
"Should contain validation error about unknown fields"
|
||||
delete_all_watches(client)
|
||||
assert b"Additional properties are not allowed" in res.data, "Should contain validation error about additional properties"
|
||||
|
||||
|
||||
def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -146,8 +97,7 @@ def test_openapi_validation_import_wrong_content_type(client, live_server, measu
|
||||
|
||||
# Should get 400 error due to content-type mismatch
|
||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||
assert b"Validation failed" in res.data, "Should contain validation error message"
|
||||
delete_all_watches(client)
|
||||
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
|
||||
|
||||
|
||||
def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -165,7 +115,6 @@ def test_openapi_validation_import_correct_content_type_succeeds(client, live_se
|
||||
# Should succeed
|
||||
assert res.status_code == 200, f"Expected 200 but got {res.status_code}"
|
||||
assert len(res.json) == 2, "Should import 2 URLs"
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_openapi_validation_get_requests_bypass_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -190,7 +139,6 @@ def test_openapi_validation_get_requests_bypass_validation(client, live_server,
|
||||
|
||||
# Should return JSON with watch list (empty in this case)
|
||||
assert isinstance(res.json, dict), "Should return JSON dictionary for watch list"
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_openapi_validation_create_tag_missing_required_title(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -207,14 +155,11 @@ def test_openapi_validation_create_tag_missing_required_title(client, live_serve
|
||||
|
||||
# Should get 400 error due to missing required field
|
||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||
assert b"Validation failed" in res.data, "Should contain validation error message"
|
||||
delete_all_watches(client)
|
||||
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
|
||||
|
||||
|
||||
def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
"""Test that watch updates allow partial updates without requiring all fields (positive test)."""
|
||||
#xxx
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# First create a valid watch
|
||||
@@ -251,5 +196,4 @@ def test_openapi_validation_watch_update_allows_partial_updates(client, live_ser
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert res.json.get('title') == 'Updated Title Only', "Title should be updated"
|
||||
assert res.json.get('url') == 'https://example.com', "URL should remain unchanged"
|
||||
delete_all_watches(client)
|
||||
assert res.json.get('url') == 'https://example.com', "URL should remain unchanged"
|
||||
@@ -18,7 +18,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
url_for("tags"),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.get_data(as_text=True).strip() == "{}", "Should be empty list"
|
||||
assert res.text.strip() == "{}", "Should be empty list"
|
||||
assert res.status_code == 200
|
||||
|
||||
res = client.post(
|
||||
@@ -36,7 +36,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert new_tag_uuid in res.get_data(as_text=True)
|
||||
assert new_tag_uuid in res.text
|
||||
assert res.json[new_tag_uuid]['title'] == tag_title
|
||||
assert res.json[new_tag_uuid]['notification_muted'] == False
|
||||
|
||||
@@ -118,16 +118,6 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
assert res.status_code == 200
|
||||
assert new_tag_uuid in res.json.get('tags', [])
|
||||
|
||||
# Test that tags are returned when listing ALL watches (issue #3854)
|
||||
res = client.get(
|
||||
url_for("createwatch"), # GET /api/v1/watch - list all watches
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert watch_uuid in res.json, "Watch should be in the list"
|
||||
assert 'tags' in res.json[watch_uuid], "Tags field should be present in watch list"
|
||||
assert new_tag_uuid in res.json[watch_uuid]['tags'], "Tag UUID should be in tags array"
|
||||
|
||||
# Check recheck by tag
|
||||
before_check_time = live_server.app.config['DATASTORE'].data['watching'][watch_uuid].get('last_checked')
|
||||
time.sleep(1)
|
||||
@@ -158,7 +148,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert new_tag_uuid not in res.get_data(as_text=True)
|
||||
assert new_tag_uuid not in res.text
|
||||
|
||||
# Verify tag was removed from watch
|
||||
res = client.get(
|
||||
@@ -176,148 +166,4 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
assert res.status_code == 204
|
||||
|
||||
|
||||
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that a tag/group can be created and updated with processor_config_restock_diff via the API.
|
||||
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
# Create a tag with processor_config_restock_diff in a single POST (issue #3966)
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({
|
||||
"title": "Restock Group",
|
||||
"overrides_watch": True,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": True,
|
||||
"price_change_min": 7777777
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 201, f"POST tag with restock config failed: {res.data}"
|
||||
tag_uuid = res.json.get('uuid')
|
||||
|
||||
# Verify processor config was saved during creation (the bug: these were discarded)
|
||||
res = client.get(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
tag_data = res.json
|
||||
assert tag_data.get('overrides_watch') == True, "overrides_watch should be saved on POST"
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only', \
|
||||
"processor_config_restock_diff should be saved on POST"
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 7777777, \
|
||||
"price_change_min should be saved on POST"
|
||||
|
||||
# Update tag with valid processor_config_restock_diff via PUT
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({
|
||||
"overrides_watch": True,
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "in_stock_only",
|
||||
"follow_price_changes": True,
|
||||
"price_change_min": 8888888
|
||||
}
|
||||
})
|
||||
)
|
||||
assert res.status_code == 200, f"PUT tag with restock config failed: {res.data}"
|
||||
|
||||
# Verify the config was stored via API
|
||||
res = client.get(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
tag_data = res.json
|
||||
assert tag_data.get('overrides_watch') == True
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only'
|
||||
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 8888888
|
||||
|
||||
# Verify the value is also reflected in the UI tag edit page
|
||||
res = client.get(url_for("tags.form_tag_edit", uuid=tag_uuid))
|
||||
assert res.status_code == 200
|
||||
assert b'8888888' in res.data, "price_change_min set via API should appear in the UI tag edit form"
|
||||
|
||||
# Invalid enum value should be rejected by OpenAPI spec validation
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({
|
||||
"processor_config_restock_diff": {
|
||||
"in_stock_processing": "not_a_valid_value"
|
||||
}
|
||||
})
|
||||
)
|
||||
assert res.status_code == 400
|
||||
assert b'Validation failed' in res.data
|
||||
|
||||
# Clean up
|
||||
res = client.delete(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 204
|
||||
|
||||
|
||||
def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test the full round trip, this way we test the default Model fits back into OpenAPI spec
|
||||
:param client:
|
||||
:param live_server:
|
||||
:param measure_memory_usage:
|
||||
:param datastore_path:
|
||||
:return:
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "My tag title"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 201
|
||||
|
||||
uuid = res.json.get('uuid')
|
||||
|
||||
# Now fetch it and send it back
|
||||
|
||||
res = client.get(
|
||||
url_for("tag", uuid=uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
|
||||
tag = res.json
|
||||
|
||||
# Only test with date_created (readOnly field that should be filtered out)
|
||||
# last_changed is Watch-specific and doesn't apply to Tags
|
||||
tag['date_created'] = 454444444444
|
||||
|
||||
# HTTP PUT ( UPDATE an existing watch )
|
||||
res = client.put(
|
||||
url_for("tag", uuid=uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps(tag),
|
||||
)
|
||||
if res.status_code != 200:
|
||||
print(f"\n=== PUT failed with {res.status_code} ===")
|
||||
print(f"Error: {res.data}")
|
||||
assert res.status_code == 200, "HTTP PUT update was sent OK"
|
||||
|
||||
# Verify readOnly fields like date_created cannot be overridden
|
||||
res = client.get(
|
||||
url_for("tag", uuid=uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
date_created = res.json.get('date_created')
|
||||
assert date_created != 454444444444, "ReadOnly date_created should not be updateable"
|
||||
assert date_created != "454444444444", "ReadOnly date_created should not be updateable"
|
||||
|
||||
@@ -6,6 +6,8 @@ from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
|
||||
extract_UUID_from_client, delete_all_watches
|
||||
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
|
||||
# Basic test to check inscriptus is not adding return line chars, basically works etc
|
||||
def test_inscriptus():
|
||||
@@ -48,15 +50,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Check this class does not appear (that we didnt see the actual source)
|
||||
assert b'foobar-detection' not in res.data
|
||||
|
||||
# Check POST preview
|
||||
res = client.post(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
# Check this class does not appear (that we didnt see the actual source)
|
||||
assert b'foobar-detection' not in res.data
|
||||
|
||||
|
||||
# Make a change
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
|
||||
@@ -422,28 +415,3 @@ def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server
|
||||
assert b'<foobar' not in res.data
|
||||
|
||||
res = delete_all_watches(client)
|
||||
|
||||
|
||||
def test_last_error_cleared_on_same_checksum(client, live_server, datastore_path):
|
||||
"""last_error should be cleared even when content is unchanged (checksumFromPreviousCheckWasTheSame path)"""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
|
||||
|
||||
# First check - establishes baseline checksum
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Inject a stale last_error directly (simulates a prior failed check)
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': 'Some previous error'})
|
||||
assert datastore.data['watching'][uuid].get('last_error') == 'Some previous error'
|
||||
|
||||
# Second check - same content, so checksumFromPreviousCheckWasTheSame will fire
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# last_error must be cleared even though no change was detected
|
||||
assert datastore.data['watching'][uuid].get('last_error') == False
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -3,13 +3,14 @@
|
||||
from .util import set_original_response, live_server_setup, wait_for_all_checks
|
||||
from flask import url_for
|
||||
import io
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
from zipfile import ZipFile
|
||||
import re
|
||||
import time
|
||||
from changedetectionio.model import Watch, Tag
|
||||
|
||||
|
||||
def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
# live_server_setup(live_server) # Setup on conftest per function
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
|
||||
@@ -31,7 +32,7 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
time.sleep(4)
|
||||
|
||||
res = client.get(
|
||||
url_for("backups.create"),
|
||||
url_for("backups.index"),
|
||||
follow_redirects=True
|
||||
)
|
||||
# Can see the download link to the backup
|
||||
@@ -53,11 +54,11 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
backup = ZipFile(io.BytesIO(res.data))
|
||||
l = backup.namelist()
|
||||
|
||||
# Check for UUID-based txt files (history, snapshot, and last-checksum)
|
||||
# Check for UUID-based txt files (history and snapshot)
|
||||
uuid4hex_txt = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||
txt_files = list(filter(uuid4hex_txt.match, l))
|
||||
# Should be three txt files in the archive (history, snapshot, and last-checksum)
|
||||
assert len(txt_files) == 3
|
||||
# Should be two txt files in the archive (history and the snapshot)
|
||||
assert len(txt_files) == 2
|
||||
|
||||
# Check for watch.json files (new format)
|
||||
uuid4hex_json = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/watch\.json$', re.I)
|
||||
@@ -68,194 +69,10 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
# Check for changedetection.json (settings file)
|
||||
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
||||
|
||||
# secret.txt must never be included — it contains the Flask session key
|
||||
assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
|
||||
|
||||
# Get the latest one
|
||||
res = client.get(
|
||||
url_for("backups.remove_backups"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'No backups found.' in res.data
|
||||
|
||||
|
||||
def test_watch_data_package_download(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test downloading a single watch's data as a zip package"""
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
|
||||
tag_uuid = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag")
|
||||
tag_uuid2 = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag number two")
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Download the watch data package
|
||||
res = client.get(url_for("ui.ui_edit.watch_get_data_package", uuid=uuid))
|
||||
|
||||
# Should get the right zip content type
|
||||
assert res.content_type == "application/zip"
|
||||
|
||||
# Should be PK/ZIP stream (PKzip header)
|
||||
assert res.data[:2] == b'PK', "File should start with PK (PKzip header)"
|
||||
assert res.data.count(b'PK') >= 2, "Should have multiple PK markers (zip file structure)"
|
||||
|
||||
# Verify zip contents
|
||||
backup = ZipFile(io.BytesIO(res.data))
|
||||
files = backup.namelist()
|
||||
|
||||
# Should have files in a UUID directory
|
||||
assert any(uuid in f for f in files), f"Files should be in UUID directory: {files}"
|
||||
|
||||
# Should contain watch.json
|
||||
watch_json_path = f"{uuid}/watch.json"
|
||||
assert watch_json_path in files, f"Should contain watch.json, got: {files}"
|
||||
|
||||
# Should contain history/snapshot files
|
||||
uuid4hex_txt = re.compile(f'^{re.escape(uuid)}/.*\\.txt', re.I)
|
||||
txt_files = list(filter(uuid4hex_txt.match, files))
|
||||
assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}"
|
||||
|
||||
|
||||
def test_backup_restore(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that a full backup zip can be restored — watches and tags survive a round-trip."""
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
watch_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Set up: one watch and two tags
|
||||
uuid = datastore.add_watch(url=watch_url)
|
||||
tag_uuid = datastore.add_tag(title="Tasty backup tag")
|
||||
tag_uuid2 = datastore.add_tag(title="Tasty backup tag number two")
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Create a full backup
|
||||
client.get(url_for("backups.request_backup"), follow_redirects=True)
|
||||
time.sleep(4)
|
||||
|
||||
# Download the latest backup zip
|
||||
res = client.get(url_for("backups.download_backup", filename="latest"), follow_redirects=True)
|
||||
assert res.content_type == "application/zip"
|
||||
zip_data = res.data
|
||||
|
||||
# Confirm the zip contains both watch.json and tag.json entries
|
||||
backup = ZipFile(io.BytesIO(zip_data))
|
||||
names = backup.namelist()
|
||||
assert f"{uuid}/watch.json" in names, f"watch.json missing from backup: {names}"
|
||||
assert f"{tag_uuid}/tag.json" in names, f"tag.json for tag 1 missing from backup: {names}"
|
||||
assert f"{tag_uuid2}/tag.json" in names, f"tag.json for tag 2 missing from backup: {names}"
|
||||
|
||||
# --- Wipe everything ---
|
||||
datastore.delete('all')
|
||||
client.get(url_for("tags.delete_all"), follow_redirects=True)
|
||||
|
||||
assert uuid not in datastore.data['watching'], "Watch should be gone after delete"
|
||||
assert tag_uuid not in datastore.data['settings']['application']['tags'], "Tag 1 should be gone after delete"
|
||||
assert tag_uuid2 not in datastore.data['settings']['application']['tags'], "Tag 2 should be gone after delete"
|
||||
|
||||
# --- Restore from the backup zip ---
|
||||
res = client.post(
|
||||
url_for("backups.restore.backups_restore_start"),
|
||||
data={
|
||||
'zip_file': (io.BytesIO(zip_data), 'backup.zip'),
|
||||
'include_groups': 'y',
|
||||
'include_groups_replace_existing': 'y',
|
||||
'include_watches': 'y',
|
||||
'include_watches_replace_existing': 'y',
|
||||
},
|
||||
content_type='multipart/form-data',
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 200
|
||||
|
||||
# Wait for the thread to finish
|
||||
time.sleep(2)
|
||||
|
||||
# --- Watch checks ---
|
||||
restored_watch = datastore.data['watching'].get(uuid)
|
||||
assert restored_watch is not None, f"Watch {uuid} not found after restore"
|
||||
assert restored_watch['url'] == watch_url, "Restored watch URL does not match"
|
||||
assert isinstance(restored_watch, Watch.model), \
|
||||
f"Watch not properly rehydrated, got {type(restored_watch)}"
|
||||
assert restored_watch.history_n >= 1, \
|
||||
f"Restored watch should have at least 1 history entry, got {restored_watch.history_n}"
|
||||
|
||||
# --- Tag checks ---
|
||||
restored_tags = datastore.data['settings']['application']['tags']
|
||||
|
||||
restored_tag = restored_tags.get(tag_uuid)
|
||||
assert restored_tag is not None, f"Tag {tag_uuid} not found after restore"
|
||||
assert restored_tag['title'] == "Tasty backup tag", "Restored tag 1 title does not match"
|
||||
assert isinstance(restored_tag, Tag.model), \
|
||||
f"Tag 1 not properly rehydrated, got {type(restored_tag)}"
|
||||
|
||||
restored_tag2 = restored_tags.get(tag_uuid2)
|
||||
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
|
||||
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
|
||||
assert isinstance(restored_tag2, Tag.model), \
|
||||
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
||||
|
||||
|
||||
def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Zip Slip path traversal entries in a restore zip must be rejected."""
|
||||
import pytest
|
||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||
|
||||
# Build a zip with a path traversal entry that would escape the extraction dir
|
||||
malicious_zip = io.BytesIO()
|
||||
with ZipFile(malicious_zip, 'w') as zf:
|
||||
zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
|
||||
malicious_zip.seek(0)
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
with pytest.raises(ValueError, match="Zip Slip"):
|
||||
import_from_zip(
|
||||
zip_stream=malicious_zip,
|
||||
datastore=datastore,
|
||||
include_groups=True,
|
||||
include_groups_replace=True,
|
||||
include_watches=True,
|
||||
include_watches_replace=True,
|
||||
)
|
||||
|
||||
|
||||
def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""A zip whose total uncompressed size exceeds the limit must be rejected.
|
||||
|
||||
The guard reads file_size from the zip central-directory metadata — no
|
||||
actual decompression happens, so this test is fast and uses minimal RAM.
|
||||
100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
|
||||
50 KB is enough to trigger the check without creating any large files.
|
||||
"""
|
||||
import pytest
|
||||
import changedetectionio.blueprint.backups.restore as restore_mod
|
||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||
|
||||
# ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
|
||||
bomb_zip = io.BytesIO()
|
||||
with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
|
||||
zf.writestr("data.txt", b"\x00" * (100 * 1024))
|
||||
bomb_zip.seek(0)
|
||||
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
|
||||
try:
|
||||
restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test
|
||||
with pytest.raises(ValueError, match="decompressed size"):
|
||||
import_from_zip(
|
||||
zip_stream=bomb_zip,
|
||||
datastore=datastore,
|
||||
include_groups=True,
|
||||
include_groups_replace=True,
|
||||
include_watches=True,
|
||||
include_watches_replace=True,
|
||||
)
|
||||
finally:
|
||||
restore_mod._MAX_DECOMPRESSED_BYTES = original_limit
|
||||
assert b'No backups found.' in res.data
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user