mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-06-13 20:31:13 +00:00
Compare commits
89 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6eb71c9eee | |||
| 34c2c05bc5 | |||
| 0da8dfb09a | |||
| b747e06c3e | |||
| 5a4266069b | |||
| 36269717b2 | |||
| 84f2629a0c | |||
| e9d740bd49 | |||
| c18421fbe9 | |||
| f29d6a857b | |||
| fcfe089a53 | |||
| b32617d700 | |||
| 380d8a26a1 | |||
| 02c03fc32b | |||
| db3d38b3ee | |||
| ecd8af94f6 | |||
| e400e463a4 | |||
| 9d355b8f05 | |||
| da43a17541 | |||
| 904eaaaaf7 | |||
| 1e12ae404f | |||
| ec7d56f85d | |||
| 417d57e574 | |||
| 1d7d812eb0 | |||
| 524393a1fb | |||
| b09ebcbef6 | |||
| 30ac10ff24 | |||
| b984426666 | |||
| 1889a10ef6 | |||
| f66ae4fceb | |||
| fb14229888 | |||
| 6d1081f5bc | |||
| 9e907d8466 | |||
| 6d6a0fd7ef | |||
| 1537e58fc2 | |||
| 5669509255 | |||
| 1d72716c69 | |||
| c12da77439 | |||
| f9048af6e8 | |||
| 2f7315e29c | |||
| bf3f8eae45 | |||
| fe7aa38c65 | |||
| a385c89abf | |||
| 98f884bbff | |||
| 35499d1171 | |||
| 599aed75d1 | |||
| 6df75a5af9 | |||
| f71c4b9865 | |||
| 82d5d7999c | |||
| 7a51f1e4bf | |||
| 91dee697f9 | |||
| 4128acf95a | |||
| 7c8d59c795 | |||
| 897403f7cc | |||
| bca35f680e | |||
| fafea1b5c6 | |||
| 93630e188d | |||
| 7e99d748b9 | |||
| 352c91c619 | |||
| a6e55aaba9 | |||
| 25a17bd49d | |||
| 954582a581 | |||
| d8ef86a8b5 | |||
| 8711d29861 | |||
| 2343ddd88a | |||
| c6d6ef0e0c | |||
| 23063ad8a1 | |||
| 27b8a2d178 | |||
| a53f2a784d | |||
| 7558ca5fda | |||
| 383c3b427f | |||
| b01ba5d8a1 | |||
| 86e5184cef | |||
| 1dbf1f5db5 | |||
| c5bd7da647 | |||
| 549e167746 | |||
| 9d38b45173 | |||
| 3558e9ee10 | |||
| 4b94de7e0c | |||
| 3f99f0dd7b | |||
| fe465de73c | |||
| 1ad3207288 | |||
| dbe238e33d | |||
| 32cb72b459 | |||
| 501aa61e19 | |||
| b6d3d63372 | |||
| f4bb32f588 | |||
| bcd32852ca | |||
| ad14807067 |
@@ -0,0 +1,33 @@
|
|||||||
|
server {
|
||||||
|
listen 80;
|
||||||
|
server_name localhost;
|
||||||
|
|
||||||
|
# Test basic reverse proxy to changedetection.io
|
||||||
|
location / {
|
||||||
|
proxy_pass http://changedet-app:5000;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
|
||||||
|
# WebSocket support
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_set_header Upgrade $http_upgrade;
|
||||||
|
proxy_set_header Connection "upgrade";
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test subpath deployment with X-Forwarded-Prefix
|
||||||
|
location /changedet-sub/ {
|
||||||
|
proxy_pass http://changedet-app:5000/;
|
||||||
|
proxy_set_header X-Forwarded-Prefix /changedet-sub;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_set_header X-Forwarded-Proto $scheme;
|
||||||
|
|
||||||
|
# WebSocket support
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_set_header Upgrade $http_upgrade;
|
||||||
|
proxy_set_header Connection "upgrade";
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -66,27 +66,27 @@ jobs:
|
|||||||
echo ${{ github.ref }} > changedetectionio/tag.txt
|
echo ${{ github.ref }} > changedetectionio/tag.txt
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v3
|
uses: docker/setup-qemu-action@v4
|
||||||
with:
|
with:
|
||||||
image: tonistiigi/binfmt:latest
|
image: tonistiigi/binfmt:latest
|
||||||
platforms: all
|
platforms: all
|
||||||
|
|
||||||
- name: Login to GitHub Container Registry
|
- name: Login to GitHub Container Registry
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v4
|
||||||
with:
|
with:
|
||||||
registry: ghcr.io
|
registry: ghcr.io
|
||||||
username: ${{ github.actor }}
|
username: ${{ github.actor }}
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Login to Docker Hub Container Registry
|
- name: Login to Docker Hub Container Registry
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v4
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
id: buildx
|
id: buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v4
|
||||||
with:
|
with:
|
||||||
install: true
|
install: true
|
||||||
version: latest
|
version: latest
|
||||||
@@ -95,7 +95,7 @@ jobs:
|
|||||||
# master branch -> :dev container tag
|
# master branch -> :dev container tag
|
||||||
- name: Docker meta :dev
|
- name: Docker meta :dev
|
||||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v6
|
||||||
id: meta_dev
|
id: meta_dev
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
@@ -103,11 +103,19 @@ jobs:
|
|||||||
ghcr.io/${{ github.repository }}
|
ghcr.io/${{ github.repository }}
|
||||||
tags: |
|
tags: |
|
||||||
type=raw,value=dev
|
type=raw,value=dev
|
||||||
|
labels: |
|
||||||
|
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||||
|
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||||
|
org.opencontainers.image.documentation=https://changedetection.io
|
||||||
|
org.opencontainers.image.revision=${{ github.sha }}
|
||||||
|
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||||
|
org.opencontainers.image.title=changedetection.io
|
||||||
|
org.opencontainers.image.url=https://changedetection.io
|
||||||
|
|
||||||
- name: Build and push :dev
|
- name: Build and push :dev
|
||||||
id: docker_build
|
id: docker_build
|
||||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v7
|
||||||
with:
|
with:
|
||||||
context: ./
|
context: ./
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
@@ -128,10 +136,10 @@ jobs:
|
|||||||
echo "Release tag: ${{ github.event.release.tag_name }}"
|
echo "Release tag: ${{ github.event.release.tag_name }}"
|
||||||
echo "Github ref: ${{ github.ref }}"
|
echo "Github ref: ${{ github.ref }}"
|
||||||
echo "Github ref name: ${{ github.ref_name }}"
|
echo "Github ref name: ${{ github.ref_name }}"
|
||||||
|
|
||||||
- name: Docker meta :tag
|
- name: Docker meta :tag
|
||||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v6
|
||||||
id: meta
|
id: meta
|
||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
@@ -142,11 +150,20 @@ jobs:
|
|||||||
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
||||||
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
||||||
type=raw,value=latest
|
type=raw,value=latest
|
||||||
|
labels: |
|
||||||
|
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
||||||
|
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
||||||
|
org.opencontainers.image.documentation=https://changedetection.io
|
||||||
|
org.opencontainers.image.revision=${{ github.sha }}
|
||||||
|
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
||||||
|
org.opencontainers.image.title=changedetection.io
|
||||||
|
org.opencontainers.image.url=https://changedetection.io
|
||||||
|
org.opencontainers.image.version=${{ github.event.release.tag_name }}
|
||||||
|
|
||||||
- name: Build and push :tag
|
- name: Build and push :tag
|
||||||
id: docker_build_tag_release
|
id: docker_build_tag_release
|
||||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v7
|
||||||
with:
|
with:
|
||||||
context: ./
|
context: ./
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ jobs:
|
|||||||
- name: Build a binary wheel and a source tarball
|
- name: Build a binary wheel and a source tarball
|
||||||
run: python3 -m build
|
run: python3 -m build
|
||||||
- name: Store the distribution packages
|
- name: Store the distribution packages
|
||||||
uses: actions/upload-artifact@v6
|
uses: actions/upload-artifact@v7
|
||||||
with:
|
with:
|
||||||
name: python-package-distributions
|
name: python-package-distributions
|
||||||
path: dist/
|
path: dist/
|
||||||
@@ -34,7 +34,7 @@ jobs:
|
|||||||
- build
|
- build
|
||||||
steps:
|
steps:
|
||||||
- name: Download all the dists
|
- name: Download all the dists
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: python-package-distributions
|
name: python-package-distributions
|
||||||
path: dist/
|
path: dist/
|
||||||
@@ -93,7 +93,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Download all the dists
|
- name: Download all the dists
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: python-package-distributions
|
name: python-package-distributions
|
||||||
path: dist/
|
path: dist/
|
||||||
|
|||||||
@@ -60,14 +60,14 @@ jobs:
|
|||||||
|
|
||||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v3
|
uses: docker/setup-qemu-action@v4
|
||||||
with:
|
with:
|
||||||
image: tonistiigi/binfmt:latest
|
image: tonistiigi/binfmt:latest
|
||||||
platforms: all
|
platforms: all
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
id: buildx
|
id: buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v4
|
||||||
with:
|
with:
|
||||||
install: true
|
install: true
|
||||||
version: latest
|
version: latest
|
||||||
@@ -75,7 +75,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
||||||
id: docker_build
|
id: docker_build
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v7
|
||||||
# https://github.com/docker/build-push-action#customizing
|
# https://github.com/docker/build-push-action#customizing
|
||||||
with:
|
with:
|
||||||
context: ./
|
context: ./
|
||||||
|
|||||||
@@ -52,4 +52,13 @@ jobs:
|
|||||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||||
with:
|
with:
|
||||||
python-version: '3.13'
|
python-version: '3.13'
|
||||||
skip-pypuppeteer: true
|
skip-pypuppeteer: true
|
||||||
|
|
||||||
|
|
||||||
|
test-application-3-14:
|
||||||
|
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||||
|
needs: lint-code
|
||||||
|
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||||
|
with:
|
||||||
|
python-version: '3.14'
|
||||||
|
skip-pypuppeteer: false
|
||||||
|
|||||||
@@ -42,10 +42,10 @@ jobs:
|
|||||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v4
|
||||||
|
|
||||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v7
|
||||||
with:
|
with:
|
||||||
context: ./
|
context: ./
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
@@ -71,7 +71,7 @@ jobs:
|
|||||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||||
|
|
||||||
- name: Upload Docker image artifact
|
- name: Upload Docker image artifact
|
||||||
uses: actions/upload-artifact@v6
|
uses: actions/upload-artifact@v7
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp/test-changedetectionio.tar
|
path: /tmp/test-changedetectionio.tar
|
||||||
@@ -88,7 +88,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -116,7 +116,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -165,14 +165,14 @@ jobs:
|
|||||||
|
|
||||||
- name: Store test artifacts
|
- name: Store test artifacts
|
||||||
if: always()
|
if: always()
|
||||||
uses: actions/upload-artifact@v6
|
uses: actions/upload-artifact@v7
|
||||||
with:
|
with:
|
||||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||||
path: output-logs
|
path: output-logs
|
||||||
|
|
||||||
- name: Store CLI test output
|
- name: Store CLI test output
|
||||||
if: always()
|
if: always()
|
||||||
uses: actions/upload-artifact@v6
|
uses: actions/upload-artifact@v7
|
||||||
with:
|
with:
|
||||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||||
path: cli-opts-output.txt
|
path: cli-opts-output.txt
|
||||||
@@ -188,7 +188,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -230,7 +230,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -270,7 +270,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -306,7 +306,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -324,6 +324,175 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
|
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
|
||||||
|
|
||||||
|
nginx-reverse-proxy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
timeout-minutes: 10
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
|
- name: Download Docker image artifact
|
||||||
|
uses: actions/download-artifact@v8
|
||||||
|
with:
|
||||||
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
|
path: /tmp
|
||||||
|
|
||||||
|
- name: Load Docker image
|
||||||
|
run: |
|
||||||
|
docker load -i /tmp/test-changedetectionio.tar
|
||||||
|
|
||||||
|
- name: Spin up services
|
||||||
|
run: |
|
||||||
|
docker network create changedet-network
|
||||||
|
|
||||||
|
# Start changedetection.io container with X-Forwarded headers support
|
||||||
|
docker run --name changedet-app --hostname changedet-app --network changedet-network \
|
||||||
|
-e USE_X_SETTINGS=true \
|
||||||
|
-d test-changedetectionio
|
||||||
|
sleep 3
|
||||||
|
|
||||||
|
- name: Start nginx reverse proxy
|
||||||
|
run: |
|
||||||
|
# Start nginx with our test configuration
|
||||||
|
docker run --name nginx-proxy --network changedet-network -d -p 8080:80 --rm \
|
||||||
|
-v ${{ github.workspace }}/.github/nginx-reverse-proxy-test.conf:/etc/nginx/conf.d/default.conf:ro \
|
||||||
|
nginx:alpine
|
||||||
|
sleep 2
|
||||||
|
|
||||||
|
- name: Test reverse proxy - root path
|
||||||
|
run: |
|
||||||
|
echo "=== Testing nginx reverse proxy at root path ==="
|
||||||
|
curl --retry-connrefused --retry 6 -s http://localhost:8080/ > /tmp/nginx-test-root.html
|
||||||
|
|
||||||
|
# Check for changedetection.io UI elements
|
||||||
|
if grep -q "checkbox-uuid" /tmp/nginx-test-root.html; then
|
||||||
|
echo "✓ Found checkbox-uuid in response"
|
||||||
|
else
|
||||||
|
echo "ERROR: checkbox-uuid not found in response"
|
||||||
|
cat /tmp/nginx-test-root.html
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check for watchlist content
|
||||||
|
if grep -q -i "watch" /tmp/nginx-test-root.html; then
|
||||||
|
echo "✓ Found watch/watchlist content in response"
|
||||||
|
else
|
||||||
|
echo "ERROR: watchlist content not found"
|
||||||
|
cat /tmp/nginx-test-root.html
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✓ Root path reverse proxy working correctly"
|
||||||
|
|
||||||
|
- name: Test reverse proxy - subpath with X-Forwarded-Prefix
|
||||||
|
run: |
|
||||||
|
echo "=== Testing nginx reverse proxy at subpath /changedet-sub/ ==="
|
||||||
|
curl --retry-connrefused --retry 6 -s http://localhost:8080/changedet-sub/ > /tmp/nginx-test-subpath.html
|
||||||
|
|
||||||
|
# Check for changedetection.io UI elements
|
||||||
|
if grep -q "checkbox-uuid" /tmp/nginx-test-subpath.html; then
|
||||||
|
echo "✓ Found checkbox-uuid in subpath response"
|
||||||
|
else
|
||||||
|
echo "ERROR: checkbox-uuid not found in subpath response"
|
||||||
|
cat /tmp/nginx-test-subpath.html
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✓ Subpath reverse proxy working correctly"
|
||||||
|
|
||||||
|
- name: Test API through reverse proxy subpath
|
||||||
|
run: |
|
||||||
|
echo "=== Testing API endpoints through nginx subpath /changedet-sub/ ==="
|
||||||
|
|
||||||
|
# Extract API key from the changedetection.io datastore
|
||||||
|
API_KEY=$(docker exec changedet-app cat /datastore/changedetection.json | grep -o '"api_access_token": *"[^"]*"' | cut -d'"' -f4)
|
||||||
|
|
||||||
|
if [ -z "$API_KEY" ]; then
|
||||||
|
echo "ERROR: Could not extract API key from datastore"
|
||||||
|
docker exec changedet-app cat /datastore/changedetection.json
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✓ Extracted API key: ${API_KEY:0:8}..."
|
||||||
|
|
||||||
|
# Create a watch via API through nginx proxy subpath
|
||||||
|
echo "Creating watch via POST to /changedet-sub/api/v1/watch"
|
||||||
|
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/changedet-sub/api/v1/watch" \
|
||||||
|
-H "x-api-key: ${API_KEY}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"url": "https://example.com/test-nginx-proxy",
|
||||||
|
"tag": "nginx-test"
|
||||||
|
}')
|
||||||
|
|
||||||
|
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
||||||
|
BODY=$(echo "$RESPONSE" | head -n-1)
|
||||||
|
|
||||||
|
if [ "$HTTP_CODE" != "201" ]; then
|
||||||
|
echo "ERROR: Expected HTTP 201, got $HTTP_CODE"
|
||||||
|
echo "Response: $BODY"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✓ Watch created successfully (HTTP 201)"
|
||||||
|
|
||||||
|
# Extract the watch UUID from response
|
||||||
|
WATCH_UUID=$(echo "$BODY" | grep -o '"uuid": *"[^"]*"' | cut -d'"' -f4)
|
||||||
|
echo "✓ Watch UUID: $WATCH_UUID"
|
||||||
|
|
||||||
|
# Update the watch via PUT through nginx proxy subpath
|
||||||
|
echo "Updating watch via PUT to /changedet-sub/api/v1/watch/${WATCH_UUID}"
|
||||||
|
RESPONSE=$(curl -s -w "\n%{http_code}" -X PUT "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
|
||||||
|
-H "x-api-key: ${API_KEY}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"paused": true
|
||||||
|
}')
|
||||||
|
|
||||||
|
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
|
||||||
|
BODY=$(echo "$RESPONSE" | head -n-1)
|
||||||
|
|
||||||
|
if [ "$HTTP_CODE" != "200" ]; then
|
||||||
|
echo "ERROR: Expected HTTP 200, got $HTTP_CODE"
|
||||||
|
echo "Response: $BODY"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if echo "$BODY" | grep -q 'OK'; then
|
||||||
|
echo "✓ Watch updated successfully (HTTP 200, response: OK)"
|
||||||
|
else
|
||||||
|
echo "ERROR: Expected response 'OK', got: $BODY"
|
||||||
|
echo "Response: $BODY"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Verify the watch is paused via GET
|
||||||
|
echo "Verifying watch is paused via GET"
|
||||||
|
RESPONSE=$(curl -s "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
|
||||||
|
-H "x-api-key: ${API_KEY}")
|
||||||
|
|
||||||
|
if echo "$RESPONSE" | grep -q '"paused": *true'; then
|
||||||
|
echo "✓ Watch is paused as expected"
|
||||||
|
else
|
||||||
|
echo "ERROR: Watch paused state not confirmed"
|
||||||
|
echo "Response: $RESPONSE"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✓ API tests through nginx subpath completed successfully"
|
||||||
|
|
||||||
|
- name: Cleanup nginx test
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
docker logs nginx-proxy || true
|
||||||
|
docker logs changedet-app || true
|
||||||
|
docker stop nginx-proxy changedet-app || true
|
||||||
|
docker rm nginx-proxy changedet-app || true
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Proxy tests
|
# Proxy tests
|
||||||
proxy-tests:
|
proxy-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
@@ -335,7 +504,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -375,7 +544,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -405,7 +574,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -429,7 +598,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -474,7 +643,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v6
|
- uses: actions/checkout@v6
|
||||||
|
|
||||||
- name: Download Docker image artifact
|
- name: Download Docker image artifact
|
||||||
uses: actions/download-artifact@v7
|
uses: actions/download-artifact@v8
|
||||||
with:
|
with:
|
||||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp
|
path: /tmp
|
||||||
@@ -537,7 +706,19 @@ jobs:
|
|||||||
- name: Check upgrade works without error
|
- name: Check upgrade works without error
|
||||||
run: |
|
run: |
|
||||||
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
|
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
|
||||||
|
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
|
||||||
|
g++ \
|
||||||
|
gcc \
|
||||||
|
libc-dev \
|
||||||
|
libffi-dev \
|
||||||
|
libjpeg-dev \
|
||||||
|
libssl-dev \
|
||||||
|
libxslt-dev \
|
||||||
|
make \
|
||||||
|
patch \
|
||||||
|
pkg-config \
|
||||||
|
zlib1g-dev
|
||||||
|
|
||||||
# Checkout old version and create datastore
|
# Checkout old version and create datastore
|
||||||
git checkout 0.49.1
|
git checkout 0.49.1
|
||||||
python3 -m venv .venv
|
python3 -m venv .venv
|
||||||
@@ -546,7 +727,7 @@ jobs:
|
|||||||
pip install 'pyOpenSSL>=23.2.0'
|
pip install 'pyOpenSSL>=23.2.0'
|
||||||
|
|
||||||
echo "=== Running version 0.49.1 to create datastore ==="
|
echo "=== Running version 0.49.1 to create datastore ==="
|
||||||
python3 ./changedetection.py -C -d /tmp/data &
|
ALLOW_IANA_RESTRICTED_ADDRESSES=true python3 ./changedetection.py -C -d /tmp/data &
|
||||||
APP_PID=$!
|
APP_PID=$!
|
||||||
|
|
||||||
# Wait for app to be ready
|
# Wait for app to be ready
|
||||||
@@ -594,7 +775,7 @@ jobs:
|
|||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
|
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
|
||||||
TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
|
ALLOW_IANA_RESTRICTED_ADDRESSES=true TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
|
||||||
|
|
||||||
echo "=== Upgrade test output ==="
|
echo "=== Upgrade test output ==="
|
||||||
cat /tmp/upgrade-test.log
|
cat /tmp/upgrade-test.log
|
||||||
@@ -602,7 +783,7 @@ jobs:
|
|||||||
|
|
||||||
# Now start the current version normally to verify the tag survived
|
# Now start the current version normally to verify the tag survived
|
||||||
echo "=== Starting current version to verify tag exists after upgrade ==="
|
echo "=== Starting current version to verify tag exists after upgrade ==="
|
||||||
timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
|
ALLOW_IANA_RESTRICTED_ADDRESSES=true timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
|
||||||
APP_PID=$!
|
APP_PID=$!
|
||||||
|
|
||||||
# Wait for app to be ready and fetch UI
|
# Wait for app to be ready and fetch UI
|
||||||
@@ -651,7 +832,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Upload upgrade test logs
|
- name: Upload upgrade test logs
|
||||||
if: always()
|
if: always()
|
||||||
uses: actions/upload-artifact@v6
|
uses: actions/upload-artifact@v7
|
||||||
with:
|
with:
|
||||||
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
|
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
|
||||||
path: /tmp/upgrade-test.log
|
path: /tmp/upgrade-test.log
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||||
# Semver means never use .01, or 00. Should be .1.
|
# Semver means never use .01, or 00. Should be .1.
|
||||||
__version__ = '0.52.9'
|
__version__ = '0.54.6'
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
@@ -61,8 +61,22 @@ import time
|
|||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
|
||||||
|
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
|
||||||
|
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
|
||||||
|
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
|
||||||
|
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
|
||||||
|
if 'MALLOC_ARENA_MAX' not in os.environ:
|
||||||
|
os.environ['MALLOC_ARENA_MAX'] = '2'
|
||||||
|
try:
|
||||||
|
import ctypes as _ctypes
|
||||||
|
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
|
||||||
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
|
||||||
if 'pytest' not in sys.modules:
|
if 'pytest' not in sys.modules:
|
||||||
@@ -610,7 +624,7 @@ def main():
|
|||||||
|
|
||||||
@app.context_processor
|
@app.context_processor
|
||||||
def inject_template_globals():
|
def inject_template_globals():
|
||||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
return dict(right_sticky="v"+__version__,
|
||||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||||
has_password=datastore.data['settings']['application']['password'] != False,
|
has_password=datastore.data['settings']['application']['password'] != False,
|
||||||
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
|
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
|
||||||
|
|||||||
@@ -0,0 +1,21 @@
|
|||||||
|
import functools
|
||||||
|
from flask import make_response
|
||||||
|
from flask_restful import Resource
|
||||||
|
|
||||||
|
|
||||||
|
@functools.cache
|
||||||
|
def _get_spec_yaml():
|
||||||
|
"""Build and cache the merged spec as a YAML string (only serialized once per process)."""
|
||||||
|
import yaml
|
||||||
|
from changedetectionio.api import build_merged_spec_dict
|
||||||
|
return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True)
|
||||||
|
|
||||||
|
|
||||||
|
class Spec(Resource):
|
||||||
|
def get(self):
|
||||||
|
"""Return the merged OpenAPI spec including all registered processor extensions."""
|
||||||
|
return make_response(
|
||||||
|
_get_spec_yaml(),
|
||||||
|
200,
|
||||||
|
{'Content-Type': 'application/yaml'}
|
||||||
|
)
|
||||||
@@ -17,7 +17,7 @@ class Tag(Resource):
|
|||||||
self.update_q = kwargs['update_q']
|
self.update_q = kwargs['update_q']
|
||||||
|
|
||||||
# Get information about a single tag
|
# Get information about a single tag
|
||||||
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
# curl http://localhost:5000/api/v1/tag/<uuid_str:uuid>
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('getTag')
|
@validate_openapi_request('getTag')
|
||||||
def get(self, uuid):
|
def get(self, uuid):
|
||||||
@@ -79,7 +79,7 @@ class Tag(Resource):
|
|||||||
'browser_steps_last_error_step', 'check_count', 'consecutive_filter_failures',
|
'browser_steps_last_error_step', 'check_count', 'consecutive_filter_failures',
|
||||||
'content-type', 'fetch_time', 'last_changed', 'last_checked', 'last_error',
|
'content-type', 'fetch_time', 'last_changed', 'last_checked', 'last_error',
|
||||||
'last_notification_error', 'last_viewed', 'notification_alert_count',
|
'last_notification_error', 'last_viewed', 'notification_alert_count',
|
||||||
'page_title', 'previous_md5', 'previous_md5_before_filters', 'remote_server_reply'
|
'page_title', 'previous_md5', 'remote_server_reply'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create clean tag dict without Watch-specific fields
|
# Create clean tag dict without Watch-specific fields
|
||||||
@@ -97,17 +97,6 @@ class Tag(Resource):
|
|||||||
# Delete the tag, and any tag reference
|
# Delete the tag, and any tag reference
|
||||||
del self.datastore.data['settings']['application']['tags'][uuid]
|
del self.datastore.data['settings']['application']['tags'][uuid]
|
||||||
|
|
||||||
# Delete tag.json file if it exists
|
|
||||||
import os
|
|
||||||
tag_dir = os.path.join(self.datastore.datastore_path, uuid)
|
|
||||||
tag_json = os.path.join(tag_dir, "tag.json")
|
|
||||||
if os.path.exists(tag_json):
|
|
||||||
try:
|
|
||||||
os.unlink(tag_json)
|
|
||||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
|
||||||
|
|
||||||
# Remove tag from all watches
|
# Remove tag from all watches
|
||||||
for watch_uuid, watch in self.datastore.data['watching'].items():
|
for watch_uuid, watch in self.datastore.data['watching'].items():
|
||||||
if watch.get('tags') and uuid in watch['tags']:
|
if watch.get('tags') and uuid in watch['tags']:
|
||||||
@@ -160,6 +149,11 @@ class Tag(Resource):
|
|||||||
tag.update(json_data)
|
tag.update(json_data)
|
||||||
tag.commit()
|
tag.commit()
|
||||||
|
|
||||||
|
# Clear checksums for all watches using this tag to force reprocessing
|
||||||
|
# Tag changes affect inherited configuration
|
||||||
|
cleared_count = self.datastore.clear_checksums_for_tag(uuid)
|
||||||
|
logger.info(f"Tag {uuid} updated via API, cleared {cleared_count} watch checksums")
|
||||||
|
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
|
|
||||||
|
|
||||||
@@ -183,6 +177,13 @@ class Tag(Resource):
|
|||||||
|
|
||||||
new_uuid = self.datastore.add_tag(title=title)
|
new_uuid = self.datastore.add_tag(title=title)
|
||||||
if new_uuid:
|
if new_uuid:
|
||||||
|
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
|
||||||
|
extra = {k: v for k, v in json_data.items() if k != 'title'}
|
||||||
|
if extra:
|
||||||
|
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
|
||||||
|
if tag:
|
||||||
|
tag.update(extra)
|
||||||
|
tag.commit()
|
||||||
return {'uuid': new_uuid}, 201
|
return {'uuid': new_uuid}, 201
|
||||||
else:
|
else:
|
||||||
return "Invalid or unsupported tag", 400
|
return "Invalid or unsupported tag", 400
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ class Watch(Resource):
|
|||||||
self.update_q = kwargs['update_q']
|
self.update_q = kwargs['update_q']
|
||||||
|
|
||||||
# Get information about a single watch, excluding the history list (can be large)
|
# Get information about a single watch, excluding the history list (can be large)
|
||||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>
|
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>
|
||||||
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
||||||
# ?recheck=true
|
# ?recheck=true
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@@ -217,7 +217,7 @@ class WatchHistory(Resource):
|
|||||||
self.datastore = kwargs['datastore']
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
# Get a list of available history for a watch by UUID
|
# Get a list of available history for a watch by UUID
|
||||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
|
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>/history
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('getWatchHistory')
|
@validate_openapi_request('getWatchHistory')
|
||||||
def get(self, uuid):
|
def get(self, uuid):
|
||||||
@@ -338,7 +338,7 @@ class WatchHistoryDiff(Resource):
|
|||||||
word_diff = True
|
word_diff = True
|
||||||
|
|
||||||
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
||||||
changes_only = strtobool(request.args.get('changesOnly', 'true'))
|
changes_only = strtobool(request.args.get('changesOnly', 'false'))
|
||||||
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
||||||
include_removed = strtobool(request.args.get('removed', 'true'))
|
include_removed = strtobool(request.args.get('removed', 'true'))
|
||||||
include_added = strtobool(request.args.get('added', 'true'))
|
include_added = strtobool(request.args.get('added', 'true'))
|
||||||
@@ -349,7 +349,7 @@ class WatchHistoryDiff(Resource):
|
|||||||
previous_version_file_contents=from_version_file_contents,
|
previous_version_file_contents=from_version_file_contents,
|
||||||
newest_version_file_contents=to_version_file_contents,
|
newest_version_file_contents=to_version_file_contents,
|
||||||
ignore_junk=ignore_whitespace,
|
ignore_junk=ignore_whitespace,
|
||||||
include_equal=changes_only,
|
include_equal=not changes_only,
|
||||||
include_removed=include_removed,
|
include_removed=include_removed,
|
||||||
include_added=include_added,
|
include_added=include_added,
|
||||||
include_replaced=include_replaced,
|
include_replaced=include_replaced,
|
||||||
@@ -567,4 +567,4 @@ class CreateWatch(Resource):
|
|||||||
|
|
||||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||||
|
|
||||||
return list, 200
|
return list, 200
|
||||||
|
|||||||
@@ -3,29 +3,18 @@ from flask import request, abort
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
def get_openapi_spec():
|
def build_merged_spec_dict():
|
||||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
|
||||||
import os
|
|
||||||
import yaml # Lazy import - only loaded when API validation is actually used
|
|
||||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
|
||||||
|
|
||||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
|
||||||
if not os.path.exists(spec_path):
|
|
||||||
# Possibly for pip3 packages
|
|
||||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
|
||||||
|
|
||||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
|
||||||
spec_dict = yaml.safe_load(f)
|
|
||||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
|
||||||
return _openapi_spec
|
|
||||||
|
|
||||||
@functools.cache
|
|
||||||
def get_openapi_schema_dict():
|
|
||||||
"""
|
"""
|
||||||
Get the raw OpenAPI spec dictionary for schema access.
|
Load the base OpenAPI spec and merge in any per-processor api.yaml extensions.
|
||||||
|
|
||||||
Used by Import endpoint to validate and convert query parameters.
|
Each processor can provide an api.yaml file alongside its __init__.py that defines
|
||||||
Returns the YAML dict directly (not the OpenAPI object).
|
additional schemas (e.g., processor_config_restock_diff). These are merged into
|
||||||
|
WatchBase.properties so the spec accurately reflects what the API accepts.
|
||||||
|
|
||||||
|
Plugin processors (via pluggy) are also supported - they just need an api.yaml
|
||||||
|
next to their processor module.
|
||||||
|
|
||||||
|
Returns the merged dict (cached - do not mutate the returned value).
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
import yaml
|
import yaml
|
||||||
@@ -35,7 +24,59 @@ def get_openapi_schema_dict():
|
|||||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||||
|
|
||||||
with open(spec_path, 'r', encoding='utf-8') as f:
|
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||||
return yaml.safe_load(f)
|
spec_dict = yaml.safe_load(f)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from changedetectionio.processors import find_processors, get_parent_module
|
||||||
|
for module, proc_name in find_processors():
|
||||||
|
parent = get_parent_module(module)
|
||||||
|
if not parent or not hasattr(parent, '__file__'):
|
||||||
|
continue
|
||||||
|
api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml')
|
||||||
|
if not os.path.exists(api_yaml_path):
|
||||||
|
continue
|
||||||
|
with open(api_yaml_path, 'r', encoding='utf-8') as f:
|
||||||
|
proc_spec = yaml.safe_load(f)
|
||||||
|
# Merge schemas
|
||||||
|
proc_schemas = proc_spec.get('components', {}).get('schemas', {})
|
||||||
|
spec_dict['components']['schemas'].update(proc_schemas)
|
||||||
|
# Inject processor_config_{name} into WatchBase if the schema is defined
|
||||||
|
schema_key = f'processor_config_{proc_name}'
|
||||||
|
if schema_key in proc_schemas:
|
||||||
|
spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = {
|
||||||
|
'$ref': f'#/components/schemas/{schema_key}'
|
||||||
|
}
|
||||||
|
# Append x-code-samples from processor paths into existing path operations
|
||||||
|
for path, path_item in proc_spec.get('paths', {}).items():
|
||||||
|
if path not in spec_dict.get('paths', {}):
|
||||||
|
continue
|
||||||
|
for method, operation in path_item.items():
|
||||||
|
if method not in spec_dict['paths'][path]:
|
||||||
|
continue
|
||||||
|
if 'x-code-samples' in operation:
|
||||||
|
existing = spec_dict['paths'][path][method].get('x-code-samples', [])
|
||||||
|
spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples']
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to merge processor API specs: {e}")
|
||||||
|
|
||||||
|
return spec_dict
|
||||||
|
|
||||||
|
|
||||||
|
@functools.cache
|
||||||
|
def get_openapi_spec():
|
||||||
|
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||||
|
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||||
|
return OpenAPI.from_dict(build_merged_spec_dict())
|
||||||
|
|
||||||
|
@functools.cache
|
||||||
|
def get_openapi_schema_dict():
|
||||||
|
"""
|
||||||
|
Get the raw OpenAPI spec dictionary for schema access.
|
||||||
|
|
||||||
|
Used by Import endpoint to validate and convert query parameters.
|
||||||
|
Returns the merged YAML dict (not the OpenAPI object).
|
||||||
|
"""
|
||||||
|
return build_merged_spec_dict()
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
def _resolve_schema_properties(schema_name):
|
def _resolve_schema_properties(schema_name):
|
||||||
@@ -70,46 +111,6 @@ def _resolve_schema_properties(schema_name):
|
|||||||
|
|
||||||
return properties
|
return properties
|
||||||
|
|
||||||
@functools.cache
|
|
||||||
def _resolve_readonly_fields(schema_name):
|
|
||||||
"""
|
|
||||||
Generic helper to resolve readOnly fields, including allOf inheritance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
schema_name: Name of the schema (e.g., 'Watch', 'Tag')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
frozenset: All readOnly field names including inherited ones
|
|
||||||
"""
|
|
||||||
spec_dict = get_openapi_schema_dict()
|
|
||||||
schema = spec_dict['components']['schemas'].get(schema_name, {})
|
|
||||||
|
|
||||||
readonly_fields = set()
|
|
||||||
|
|
||||||
# Handle allOf (schema inheritance)
|
|
||||||
if 'allOf' in schema:
|
|
||||||
for item in schema['allOf']:
|
|
||||||
# Resolve $ref to parent schema
|
|
||||||
if '$ref' in item:
|
|
||||||
ref_path = item['$ref'].split('/')[-1]
|
|
||||||
ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
|
|
||||||
if 'properties' in ref_schema:
|
|
||||||
for field_name, field_def in ref_schema['properties'].items():
|
|
||||||
if field_def.get('readOnly') is True:
|
|
||||||
readonly_fields.add(field_name)
|
|
||||||
# Check schema-specific properties
|
|
||||||
if 'properties' in item:
|
|
||||||
for field_name, field_def in item['properties'].items():
|
|
||||||
if field_def.get('readOnly') is True:
|
|
||||||
readonly_fields.add(field_name)
|
|
||||||
else:
|
|
||||||
# Direct properties (no inheritance)
|
|
||||||
if 'properties' in schema:
|
|
||||||
for field_name, field_def in schema['properties'].items():
|
|
||||||
if field_def.get('readOnly') is True:
|
|
||||||
readonly_fields.add(field_name)
|
|
||||||
|
|
||||||
return frozenset(readonly_fields)
|
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
def get_watch_schema_properties():
|
def get_watch_schema_properties():
|
||||||
@@ -120,14 +121,8 @@ def get_watch_schema_properties():
|
|||||||
"""
|
"""
|
||||||
return _resolve_schema_properties('WatchBase')
|
return _resolve_schema_properties('WatchBase')
|
||||||
|
|
||||||
@functools.cache
|
# Import readonly field utilities from shared module (avoids circular dependencies with model layer)
|
||||||
def get_readonly_watch_fields():
|
from changedetectionio.model.schema_utils import get_readonly_watch_fields, get_readonly_tag_fields
|
||||||
"""
|
|
||||||
Extract readOnly field names from Watch schema in OpenAPI spec.
|
|
||||||
|
|
||||||
Returns readOnly fields from WatchBase (uuid, date_created) + Watch-specific readOnly fields.
|
|
||||||
"""
|
|
||||||
return _resolve_readonly_fields('Watch')
|
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
def get_tag_schema_properties():
|
def get_tag_schema_properties():
|
||||||
@@ -138,15 +133,6 @@ def get_tag_schema_properties():
|
|||||||
"""
|
"""
|
||||||
return _resolve_schema_properties('Tag')
|
return _resolve_schema_properties('Tag')
|
||||||
|
|
||||||
@functools.cache
|
|
||||||
def get_readonly_tag_fields():
|
|
||||||
"""
|
|
||||||
Extract readOnly field names from Tag schema in OpenAPI spec.
|
|
||||||
|
|
||||||
Returns readOnly fields from WatchBase (uuid, date_created) + Tag-specific readOnly fields.
|
|
||||||
"""
|
|
||||||
return _resolve_readonly_fields('Tag')
|
|
||||||
|
|
||||||
def validate_openapi_request(operation_id):
|
def validate_openapi_request(operation_id):
|
||||||
"""Decorator to validate incoming requests against OpenAPI spec."""
|
"""Decorator to validate incoming requests against OpenAPI spec."""
|
||||||
def decorator(f):
|
def decorator(f):
|
||||||
@@ -158,6 +144,7 @@ def validate_openapi_request(operation_id):
|
|||||||
if request.method.upper() != 'GET':
|
if request.method.upper() != 'GET':
|
||||||
# Lazy import - only loaded when actually validating a request
|
# Lazy import - only loaded when actually validating a request
|
||||||
from openapi_core.contrib.flask import FlaskOpenAPIRequest
|
from openapi_core.contrib.flask import FlaskOpenAPIRequest
|
||||||
|
from openapi_core.templating.paths.exceptions import ServerNotFound, PathNotFound, PathError
|
||||||
|
|
||||||
spec = get_openapi_spec()
|
spec = get_openapi_spec()
|
||||||
openapi_request = FlaskOpenAPIRequest(request)
|
openapi_request = FlaskOpenAPIRequest(request)
|
||||||
@@ -165,6 +152,16 @@ def validate_openapi_request(operation_id):
|
|||||||
if result.errors:
|
if result.errors:
|
||||||
error_details = []
|
error_details = []
|
||||||
for error in result.errors:
|
for error in result.errors:
|
||||||
|
# Skip path/server validation errors for reverse proxy compatibility
|
||||||
|
# Flask routing already validates that endpoints exist (returns 404 if not).
|
||||||
|
# OpenAPI validation here is primarily for request body schema validation.
|
||||||
|
# When behind nginx/reverse proxy, URLs may have path prefixes that don't
|
||||||
|
# match the OpenAPI server definitions, causing false positives.
|
||||||
|
if isinstance(error, PathError):
|
||||||
|
logger.debug(f"API Call - Skipping path/server validation (delegated to Flask): {error}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
error_str = str(error)
|
||||||
# Extract detailed schema errors from __cause__
|
# Extract detailed schema errors from __cause__
|
||||||
if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
|
if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
|
||||||
for schema_error in error.__cause__.schema_errors:
|
for schema_error in error.__cause__.schema_errors:
|
||||||
@@ -172,9 +169,12 @@ def validate_openapi_request(operation_id):
|
|||||||
msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
|
msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
|
||||||
error_details.append(f"{field}: {msg}")
|
error_details.append(f"{field}: {msg}")
|
||||||
else:
|
else:
|
||||||
error_details.append(str(error))
|
error_details.append(error_str)
|
||||||
|
|
||||||
|
# Only raise if we have actual validation errors (not path/server issues)
|
||||||
|
if error_details:
|
||||||
logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
|
logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
|
||||||
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
|
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
|
||||||
except BadRequest:
|
except BadRequest:
|
||||||
# Re-raise BadRequest exceptions (validation failures)
|
# Re-raise BadRequest exceptions (validation failures)
|
||||||
raise
|
raise
|
||||||
@@ -191,5 +191,6 @@ from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, Cr
|
|||||||
from .Tags import Tags, Tag
|
from .Tags import Tags, Tag
|
||||||
from .Import import Import
|
from .Import import Import
|
||||||
from .SystemInfo import SystemInfo
|
from .SystemInfo import SystemInfo
|
||||||
|
from .Spec import Spec
|
||||||
from .Notifications import Notifications
|
from .Notifications import Notifications
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from loguru import logger
|
|||||||
BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
|
BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
|
||||||
|
|
||||||
|
|
||||||
def create_backup(datastore_path, watches: dict):
|
def create_backup(datastore_path, watches: dict, tags: dict = None):
|
||||||
logger.debug("Creating backup...")
|
logger.debug("Creating backup...")
|
||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -40,10 +40,14 @@ def create_backup(datastore_path, watches: dict):
|
|||||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||||
logger.debug("Added url-watches.json to backup")
|
logger.debug("Added url-watches.json to backup")
|
||||||
|
|
||||||
# Add the flask app secret (if it exists)
|
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
||||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
for uuid, tag in (tags or {}).items():
|
||||||
if os.path.isfile(secret_file):
|
for f in Path(tag.data_dir).glob('*'):
|
||||||
zipObj.write(secret_file, arcname="secret.txt")
|
zipObj.write(f,
|
||||||
|
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||||
|
compress_type=zipfile.ZIP_DEFLATED,
|
||||||
|
compresslevel=8)
|
||||||
|
logger.debug(f"Added tag '{tag.get('title')}' ({uuid}) to backup")
|
||||||
|
|
||||||
# Add any data in the watch data directory.
|
# Add any data in the watch data directory.
|
||||||
for uuid, w in watches.items():
|
for uuid, w in watches.items():
|
||||||
@@ -88,7 +92,10 @@ def create_backup(datastore_path, watches: dict):
|
|||||||
|
|
||||||
|
|
||||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
|
from .restore import construct_restore_blueprint
|
||||||
|
|
||||||
backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
|
backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
|
||||||
|
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
|
||||||
backup_threads = []
|
backup_threads = []
|
||||||
|
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
@@ -96,16 +103,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
def request_backup():
|
def request_backup():
|
||||||
if any(thread.is_alive() for thread in backup_threads):
|
if any(thread.is_alive() for thread in backup_threads):
|
||||||
flash(gettext("A backup is already running, check back in a few minutes"), "error")
|
flash(gettext("A backup is already running, check back in a few minutes"), "error")
|
||||||
return redirect(url_for('backups.index'))
|
return redirect(url_for('backups.create'))
|
||||||
|
|
||||||
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
|
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
|
||||||
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
||||||
return redirect(url_for('backups.index'))
|
return redirect(url_for('backups.create'))
|
||||||
|
|
||||||
# With immediate persistence, all data is already saved
|
# With immediate persistence, all data is already saved
|
||||||
zip_thread = threading.Thread(
|
zip_thread = threading.Thread(
|
||||||
target=create_backup,
|
target=create_backup,
|
||||||
args=(datastore.datastore_path, datastore.data.get("watching")),
|
args=(datastore.datastore_path, datastore.data.get("watching")),
|
||||||
|
kwargs={'tags': datastore.data['settings']['application'].get('tags', {})},
|
||||||
daemon=True,
|
daemon=True,
|
||||||
name="BackupCreator"
|
name="BackupCreator"
|
||||||
)
|
)
|
||||||
@@ -113,7 +121,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
backup_threads.append(zip_thread)
|
backup_threads.append(zip_thread)
|
||||||
flash(gettext("Backup building in background, check back in a few minutes."))
|
flash(gettext("Backup building in background, check back in a few minutes."))
|
||||||
|
|
||||||
return redirect(url_for('backups.index'))
|
return redirect(url_for('backups.create'))
|
||||||
|
|
||||||
def find_backups():
|
def find_backups():
|
||||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||||
@@ -138,31 +146,34 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
def download_backup(filename):
|
def download_backup(filename):
|
||||||
import re
|
import re
|
||||||
filename = filename.strip()
|
filename = filename.strip()
|
||||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
|
||||||
|
|
||||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
|
||||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
|
||||||
abort(404)
|
|
||||||
|
|
||||||
|
# Resolve 'latest' before any validation so checks run against the real filename.
|
||||||
if filename == 'latest':
|
if filename == 'latest':
|
||||||
backups = find_backups()
|
backups = find_backups()
|
||||||
|
if not backups:
|
||||||
|
abort(404)
|
||||||
filename = backups[0]['filename']
|
filename = backups[0]['filename']
|
||||||
|
|
||||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||||
|
|
||||||
|
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||||
|
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
|
||||||
|
abort(404)
|
||||||
|
|
||||||
logger.debug(f"Backup download request for '{full_path}'")
|
logger.debug(f"Backup download request for '{full_path}'")
|
||||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||||
|
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
@backups_blueprint.route("", methods=['GET'])
|
@backups_blueprint.route("/", methods=['GET'])
|
||||||
def index():
|
@backups_blueprint.route("/create", methods=['GET'])
|
||||||
|
def create():
|
||||||
backups = find_backups()
|
backups = find_backups()
|
||||||
output = render_template("overview.html",
|
output = render_template("backup_create.html",
|
||||||
available_backups=backups,
|
available_backups=backups,
|
||||||
backup_running=any(thread.is_alive() for thread in backup_threads)
|
backup_running=any(thread.is_alive() for thread in backup_threads)
|
||||||
)
|
)
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
@@ -176,6 +187,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
flash(gettext("Backups were deleted."))
|
flash(gettext("Backups were deleted."))
|
||||||
|
|
||||||
return redirect(url_for('backups.index'))
|
return redirect(url_for('backups.create'))
|
||||||
|
|
||||||
return backups_blueprint
|
return backups_blueprint
|
||||||
|
|||||||
@@ -0,0 +1,248 @@
|
|||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
from flask import Blueprint, render_template, flash, url_for, redirect, request
|
||||||
|
from flask_babel import gettext, lazy_gettext as _l
|
||||||
|
from wtforms import Form, BooleanField, SubmitField
|
||||||
|
from flask_wtf.file import FileField, FileAllowed
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from changedetectionio.flask_app import login_optionally_required
|
||||||
|
|
||||||
|
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
|
||||||
|
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
|
||||||
|
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
|
||||||
|
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
|
||||||
|
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
|
||||||
|
_UUID_RE = re.compile(
|
||||||
|
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RestoreForm(Form):
|
||||||
|
zip_file = FileField(_l('Backup zip file'), validators=[
|
||||||
|
FileAllowed(['zip'], _l('Must be a .zip backup file!'))
|
||||||
|
])
|
||||||
|
include_groups = BooleanField(_l('Include groups'), default=True)
|
||||||
|
include_groups_replace_existing = BooleanField(_l('Replace existing groups of the same UUID'), default=True)
|
||||||
|
include_watches = BooleanField(_l('Include watches'), default=True)
|
||||||
|
include_watches_replace_existing = BooleanField(_l('Replace existing watches of the same UUID'), default=True)
|
||||||
|
submit = SubmitField(_l('Restore backup'))
|
||||||
|
|
||||||
|
|
||||||
|
def import_from_zip(zip_stream, datastore, include_groups, include_groups_replace, include_watches, include_watches_replace):
|
||||||
|
"""
|
||||||
|
Extract and import watches and groups from a backup zip stream.
|
||||||
|
|
||||||
|
Mirrors the store's _load_watches / _load_tags loading pattern:
|
||||||
|
- UUID dirs with tag.json → Tag.model + tag_obj.commit()
|
||||||
|
- UUID dirs with watch.json → rehydrate_entity + watch_obj.commit()
|
||||||
|
|
||||||
|
Returns a dict with counts: restored_groups, skipped_groups, restored_watches, skipped_watches.
|
||||||
|
Raises zipfile.BadZipFile if the stream is not a valid zip.
|
||||||
|
"""
|
||||||
|
from changedetectionio.model import Tag
|
||||||
|
|
||||||
|
restored_groups = 0
|
||||||
|
skipped_groups = 0
|
||||||
|
restored_watches = 0
|
||||||
|
skipped_watches = 0
|
||||||
|
|
||||||
|
current_tags = datastore.data['settings']['application'].get('tags', {})
|
||||||
|
current_watches = datastore.data['watching']
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
||||||
|
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
||||||
|
total_uncompressed = sum(m.file_size for m in zf.infolist())
|
||||||
|
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
|
||||||
|
raise ValueError(
|
||||||
|
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
|
||||||
|
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
|
||||||
|
)
|
||||||
|
resolved_dest = os.path.realpath(tmpdir)
|
||||||
|
for member in zf.infolist():
|
||||||
|
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
|
||||||
|
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
|
||||||
|
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
|
||||||
|
zf.extract(member, tmpdir)
|
||||||
|
logger.debug("Restore: zip extracted, scanning UUID directories")
|
||||||
|
|
||||||
|
for entry in os.scandir(tmpdir):
|
||||||
|
if not entry.is_dir():
|
||||||
|
continue
|
||||||
|
|
||||||
|
uuid = entry.name
|
||||||
|
if not _UUID_RE.match(uuid):
|
||||||
|
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
|
||||||
|
continue
|
||||||
|
tag_json_path = os.path.join(entry.path, 'tag.json')
|
||||||
|
watch_json_path = os.path.join(entry.path, 'watch.json')
|
||||||
|
|
||||||
|
# --- Tags (groups) ---
|
||||||
|
if include_groups and os.path.exists(tag_json_path):
|
||||||
|
if uuid in current_tags and not include_groups_replace:
|
||||||
|
logger.debug(f"Restore: skipping existing group {uuid} (replace not requested)")
|
||||||
|
skipped_groups += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(tag_json_path, 'r', encoding='utf-8') as f:
|
||||||
|
tag_data = json.load(f)
|
||||||
|
except (json.JSONDecodeError, IOError) as e:
|
||||||
|
logger.error(f"Restore: failed to read tag.json for {uuid}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = tag_data.get('title', uuid)
|
||||||
|
logger.debug(f"Restore: importing group '{title}' ({uuid})")
|
||||||
|
|
||||||
|
# Mirror _load_tags: set uuid and force processor
|
||||||
|
tag_data['uuid'] = uuid
|
||||||
|
tag_data['processor'] = 'restock_diff'
|
||||||
|
|
||||||
|
# Copy the UUID directory so data_dir exists for commit()
|
||||||
|
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||||
|
if os.path.exists(dst_dir):
|
||||||
|
shutil.rmtree(dst_dir)
|
||||||
|
shutil.copytree(entry.path, dst_dir)
|
||||||
|
|
||||||
|
tag_obj = Tag.model(
|
||||||
|
datastore_path=datastore.datastore_path,
|
||||||
|
__datastore=datastore.data,
|
||||||
|
default=tag_data
|
||||||
|
)
|
||||||
|
current_tags[uuid] = tag_obj
|
||||||
|
tag_obj.commit()
|
||||||
|
restored_groups += 1
|
||||||
|
logger.success(f"Restore: group '{title}' ({uuid}) restored")
|
||||||
|
|
||||||
|
# --- Watches ---
|
||||||
|
elif include_watches and os.path.exists(watch_json_path):
|
||||||
|
if uuid in current_watches and not include_watches_replace:
|
||||||
|
logger.debug(f"Restore: skipping existing watch {uuid} (replace not requested)")
|
||||||
|
skipped_watches += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(watch_json_path, 'r', encoding='utf-8') as f:
|
||||||
|
watch_data = json.load(f)
|
||||||
|
except (json.JSONDecodeError, IOError) as e:
|
||||||
|
logger.error(f"Restore: failed to read watch.json for {uuid}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
url = watch_data.get('url', uuid)
|
||||||
|
logger.debug(f"Restore: importing watch '{url}' ({uuid})")
|
||||||
|
|
||||||
|
# Copy UUID directory first so data_dir and history files exist
|
||||||
|
dst_dir = os.path.join(datastore.datastore_path, uuid)
|
||||||
|
if os.path.exists(dst_dir):
|
||||||
|
shutil.rmtree(dst_dir)
|
||||||
|
shutil.copytree(entry.path, dst_dir)
|
||||||
|
|
||||||
|
# Mirror _load_watches / rehydrate_entity
|
||||||
|
watch_data['uuid'] = uuid
|
||||||
|
watch_obj = datastore.rehydrate_entity(uuid, watch_data)
|
||||||
|
current_watches[uuid] = watch_obj
|
||||||
|
watch_obj.commit()
|
||||||
|
restored_watches += 1
|
||||||
|
logger.success(f"Restore: watch '{url}' ({uuid}) restored")
|
||||||
|
|
||||||
|
logger.debug(f"Restore: scan complete - groups {restored_groups} restored / {skipped_groups} skipped, "
|
||||||
|
f"watches {restored_watches} restored / {skipped_watches} skipped")
|
||||||
|
|
||||||
|
# Persist changedetection.json (includes the updated tags dict)
|
||||||
|
logger.debug("Restore: committing datastore settings")
|
||||||
|
datastore.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'restored_groups': restored_groups,
|
||||||
|
'skipped_groups': skipped_groups,
|
||||||
|
'restored_watches': restored_watches,
|
||||||
|
'skipped_watches': skipped_watches,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def construct_restore_blueprint(datastore):
|
||||||
|
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
|
||||||
|
restore_threads = []
|
||||||
|
|
||||||
|
@login_optionally_required
|
||||||
|
@restore_blueprint.route("/restore", methods=['GET'])
|
||||||
|
def restore():
|
||||||
|
form = RestoreForm()
|
||||||
|
return render_template("backup_restore.html",
|
||||||
|
form=form,
|
||||||
|
restore_running=any(t.is_alive() for t in restore_threads),
|
||||||
|
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
||||||
|
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
||||||
|
|
||||||
|
@login_optionally_required
|
||||||
|
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||||
|
def backups_restore_start():
|
||||||
|
if any(t.is_alive() for t in restore_threads):
|
||||||
|
flash(gettext("A restore is already running, check back in a few minutes"), "error")
|
||||||
|
return redirect(url_for('backups.restore.restore'))
|
||||||
|
|
||||||
|
zip_file = request.files.get('zip_file')
|
||||||
|
if not zip_file or not zip_file.filename:
|
||||||
|
flash(gettext("No file uploaded"), "error")
|
||||||
|
return redirect(url_for('backups.restore.restore'))
|
||||||
|
|
||||||
|
if not zip_file.filename.lower().endswith('.zip'):
|
||||||
|
flash(gettext("File must be a .zip backup file"), "error")
|
||||||
|
return redirect(url_for('backups.restore.restore'))
|
||||||
|
|
||||||
|
# Reject oversized uploads before reading the stream into memory.
|
||||||
|
content_length = request.content_length
|
||||||
|
if content_length and content_length > _MAX_UPLOAD_BYTES:
|
||||||
|
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||||
|
return redirect(url_for('backups.restore.restore'))
|
||||||
|
|
||||||
|
# Read into memory now — the request stream is gone once we return.
|
||||||
|
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
|
||||||
|
try:
|
||||||
|
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
|
||||||
|
if len(raw) > _MAX_UPLOAD_BYTES:
|
||||||
|
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
||||||
|
return redirect(url_for('backups.restore.restore'))
|
||||||
|
zip_bytes = io.BytesIO(raw)
|
||||||
|
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
|
||||||
|
pass
|
||||||
|
zip_bytes.seek(0)
|
||||||
|
except zipfile.BadZipFile:
|
||||||
|
flash(gettext("Invalid or corrupted zip file"), "error")
|
||||||
|
return redirect(url_for('backups.restore.restore'))
|
||||||
|
|
||||||
|
include_groups = request.form.get('include_groups') == 'y'
|
||||||
|
include_groups_replace = request.form.get('include_groups_replace_existing') == 'y'
|
||||||
|
include_watches = request.form.get('include_watches') == 'y'
|
||||||
|
include_watches_replace = request.form.get('include_watches_replace_existing') == 'y'
|
||||||
|
|
||||||
|
restore_thread = threading.Thread(
|
||||||
|
target=import_from_zip,
|
||||||
|
kwargs={
|
||||||
|
'zip_stream': zip_bytes,
|
||||||
|
'datastore': datastore,
|
||||||
|
'include_groups': include_groups,
|
||||||
|
'include_groups_replace': include_groups_replace,
|
||||||
|
'include_watches': include_watches,
|
||||||
|
'include_watches_replace': include_watches_replace,
|
||||||
|
},
|
||||||
|
daemon=True,
|
||||||
|
name="BackupRestore"
|
||||||
|
)
|
||||||
|
restore_thread.start()
|
||||||
|
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
|
||||||
|
restore_threads.append(restore_thread)
|
||||||
|
flash(gettext("Restore started in background, check back in a few minutes."))
|
||||||
|
return redirect(url_for('backups.restore.restore'))
|
||||||
|
|
||||||
|
return restore_blueprint
|
||||||
@@ -0,0 +1,49 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
{% block content %}
|
||||||
|
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||||
|
|
||||||
|
<div class="edit-form">
|
||||||
|
<div class="tabs collapsable">
|
||||||
|
<ul>
|
||||||
|
<li class="tab active" id=""><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||||
|
<li class="tab"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="box-wrap inner">
|
||||||
|
<div id="general">
|
||||||
|
{% if backup_running %}
|
||||||
|
<p>
|
||||||
|
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||||
|
</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<p>
|
||||||
|
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
||||||
|
</p>
|
||||||
|
<br>
|
||||||
|
{% if available_backups %}
|
||||||
|
<ul>
|
||||||
|
{% for backup in available_backups %}
|
||||||
|
<li>
|
||||||
|
<a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% else %}
|
||||||
|
<p>
|
||||||
|
<strong>{{ _('No backups found.') }}</strong>
|
||||||
|
</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<a class="pure-button pure-button-primary"
|
||||||
|
href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
||||||
|
{% if available_backups %}
|
||||||
|
<a class="pure-button button-small button-error "
|
||||||
|
href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
{% block content %}
|
||||||
|
{% from '_helpers.html' import render_field, render_checkbox_field %}
|
||||||
|
|
||||||
|
<div class="edit-form">
|
||||||
|
<div class="tabs collapsable">
|
||||||
|
<ul>
|
||||||
|
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
|
||||||
|
<li class="tab active"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="box-wrap inner">
|
||||||
|
<div id="general">
|
||||||
|
{% if restore_running %}
|
||||||
|
<p>
|
||||||
|
<span class="spinner"></span> <strong>{{ _('A restore is running!') }}</strong>
|
||||||
|
</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||||
|
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||||
|
<p class="pure-form-message">
|
||||||
|
{{ _('Max upload size: %(upload)s MB · Max decompressed size: %(decomp)s MB',
|
||||||
|
upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<form class="pure-form pure-form-stacked settings"
|
||||||
|
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
||||||
|
method="POST"
|
||||||
|
enctype="multipart/form-data">
|
||||||
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||||
|
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.include_groups) }}
|
||||||
|
<span class="pure-form-message-inline">{{ _('Include all groups found in backup?') }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.include_groups_replace_existing) }}
|
||||||
|
<span class="pure-form-message-inline">{{ _('Replace any existing groups of the same UUID?') }}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.include_watches) }}
|
||||||
|
<span class="pure-form-message-inline">{{ _('Include all watches found in backup?') }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.include_watches_replace_existing) }}
|
||||||
|
<span class="pure-form-message-inline">{{ _('Replace any existing watches of the same UUID?') }}</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_field(form.zip_file) }}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="pure-controls">
|
||||||
|
<button type="submit" class="pure-button pure-button-primary">{{ _('Restore backup') }}</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
{% extends 'base.html' %}
|
|
||||||
{% block content %}
|
|
||||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
|
||||||
<div class="edit-form">
|
|
||||||
<div class="box-wrap inner">
|
|
||||||
<h2>{{ _('Backups') }}</h2>
|
|
||||||
{% if backup_running %}
|
|
||||||
<p>
|
|
||||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
|
||||||
</p>
|
|
||||||
{% endif %}
|
|
||||||
<p>
|
|
||||||
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
|
|
||||||
</p>
|
|
||||||
<br>
|
|
||||||
{% if available_backups %}
|
|
||||||
<ul>
|
|
||||||
{% for backup in available_backups %}
|
|
||||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}</li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
{% else %}
|
|
||||||
<p>
|
|
||||||
<strong>{{ _('No backups found.') }}</strong>
|
|
||||||
</p>
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
|
|
||||||
{% if available_backups %}
|
|
||||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
{% endblock %}
|
|
||||||
@@ -102,6 +102,35 @@ def run_async_in_browser_loop(coro):
|
|||||||
else:
|
else:
|
||||||
raise RuntimeError("Browser steps event loop is not available")
|
raise RuntimeError("Browser steps event loop is not available")
|
||||||
|
|
||||||
|
async def _close_session_resources(session_data, label=''):
|
||||||
|
"""Close all browser resources for a session in the correct order.
|
||||||
|
|
||||||
|
browserstepper.cleanup() closes page+context but not the browser itself.
|
||||||
|
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
|
||||||
|
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
|
||||||
|
"""
|
||||||
|
browserstepper = session_data.get('browserstepper')
|
||||||
|
if browserstepper:
|
||||||
|
try:
|
||||||
|
await browserstepper.cleanup()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error cleaning up browserstepper{label}: {e}")
|
||||||
|
|
||||||
|
browser = session_data.get('browser')
|
||||||
|
if browser:
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error closing browser{label}: {e}")
|
||||||
|
|
||||||
|
playwright_context = session_data.get('playwright_context')
|
||||||
|
if playwright_context:
|
||||||
|
try:
|
||||||
|
await playwright_context.stop()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error stopping playwright context{label}: {e}")
|
||||||
|
|
||||||
|
|
||||||
def cleanup_expired_sessions():
|
def cleanup_expired_sessions():
|
||||||
"""Remove expired browsersteps sessions and cleanup their resources"""
|
"""Remove expired browsersteps sessions and cleanup their resources"""
|
||||||
global browsersteps_sessions, browsersteps_watch_to_session
|
global browsersteps_sessions, browsersteps_watch_to_session
|
||||||
@@ -119,13 +148,10 @@ def cleanup_expired_sessions():
|
|||||||
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
||||||
session_data = browsersteps_sessions[session_id]
|
session_data = browsersteps_sessions[session_id]
|
||||||
|
|
||||||
# Cleanup playwright resources asynchronously
|
try:
|
||||||
browserstepper = session_data.get('browserstepper')
|
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}"))
|
||||||
if browserstepper:
|
except Exception as e:
|
||||||
try:
|
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||||
run_async_in_browser_loop(browserstepper.cleanup())
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
|
||||||
|
|
||||||
# Remove from sessions dict
|
# Remove from sessions dict
|
||||||
del browsersteps_sessions[session_id]
|
del browsersteps_sessions[session_id]
|
||||||
@@ -152,12 +178,10 @@ def cleanup_session_for_watch(watch_uuid):
|
|||||||
|
|
||||||
session_data = browsersteps_sessions.get(session_id)
|
session_data = browsersteps_sessions.get(session_id)
|
||||||
if session_data:
|
if session_data:
|
||||||
browserstepper = session_data.get('browserstepper')
|
try:
|
||||||
if browserstepper:
|
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}"))
|
||||||
try:
|
except Exception as e:
|
||||||
run_async_in_browser_loop(browserstepper.cleanup())
|
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
|
||||||
|
|
||||||
# Remove from sessions dict
|
# Remove from sessions dict
|
||||||
del browsersteps_sessions[session_id]
|
del browsersteps_sessions[session_id]
|
||||||
@@ -174,63 +198,73 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||||
|
|
||||||
async def start_browsersteps_session(watch_uuid):
|
async def start_browsersteps_session(watch_uuid):
|
||||||
from . import browser_steps
|
from changedetectionio.browser_steps import browser_steps
|
||||||
import time
|
import time
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
# We keep the playwright session open for many minutes
|
|
||||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||||
|
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||||
|
|
||||||
browsersteps_start_session = {'start_time': time.time()}
|
browsersteps_start_session = {'start_time': time.time()}
|
||||||
|
|
||||||
# Create a new async playwright instance for browser steps
|
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
|
||||||
playwright_instance = async_playwright()
|
|
||||||
playwright_context = await playwright_instance.start()
|
|
||||||
|
|
||||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
|
||||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
|
||||||
a = "?" if not '?' in base_url else '&'
|
|
||||||
base_url += a + f"timeout={keepalive_ms}"
|
|
||||||
|
|
||||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
|
||||||
browsersteps_start_session['browser'] = browser
|
|
||||||
browsersteps_start_session['playwright_context'] = playwright_context
|
|
||||||
|
|
||||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||||
proxy = None
|
proxy = None
|
||||||
if proxy_id:
|
if proxy_id:
|
||||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url')
|
||||||
if proxy_url:
|
if proxy_url:
|
||||||
|
|
||||||
# Playwright needs separate username and password values
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
parsed = urlparse(proxy_url)
|
parsed = urlparse(proxy_url)
|
||||||
proxy = {'server': proxy_url}
|
proxy = {'server': proxy_url}
|
||||||
|
|
||||||
if parsed.username:
|
if parsed.username:
|
||||||
proxy['username'] = parsed.username
|
proxy['username'] = parsed.username
|
||||||
|
|
||||||
if parsed.password:
|
if parsed.password:
|
||||||
proxy['password'] = parsed.password
|
proxy['password'] = parsed.password
|
||||||
|
|
||||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||||
|
|
||||||
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
# Resolve the fetcher class for this watch so we can ask it to launch its own browser
|
||||||
|
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
|
||||||
|
watch = datastore.data['watching'][watch_uuid]
|
||||||
|
from changedetectionio import content_fetchers
|
||||||
|
fetcher_name = watch.get_fetch_backend or 'system'
|
||||||
|
if fetcher_name == 'system':
|
||||||
|
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
|
||||||
|
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
||||||
|
|
||||||
|
browser = None
|
||||||
|
playwright_context = None
|
||||||
|
|
||||||
|
# If the fetcher has its own browser launch for the live steps UI, use it.
|
||||||
|
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
|
||||||
|
# or None to fall back to the default CDP path.
|
||||||
|
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
|
||||||
|
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
|
||||||
|
if result is not None:
|
||||||
|
browser, playwright_context = result
|
||||||
|
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_name}'")
|
||||||
|
|
||||||
|
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
|
||||||
|
if browser is None:
|
||||||
|
playwright_instance = async_playwright()
|
||||||
|
playwright_context = await playwright_instance.start()
|
||||||
|
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||||
|
a = "?" if '?' not in base_url else '&'
|
||||||
|
base_url += a + f"timeout={keepalive_ms}"
|
||||||
|
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||||
|
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
|
||||||
|
|
||||||
|
browsersteps_start_session['browser'] = browser
|
||||||
|
browsersteps_start_session['playwright_context'] = playwright_context
|
||||||
|
|
||||||
browserstepper = browser_steps.browsersteps_live_ui(
|
browserstepper = browser_steps.browsersteps_live_ui(
|
||||||
playwright_browser=browser,
|
playwright_browser=browser,
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
start_url=datastore.data['watching'][watch_uuid].link,
|
start_url=watch.link,
|
||||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
headers=watch.get('headers')
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize the async connection
|
|
||||||
await browserstepper.connect(proxy=proxy)
|
await browserstepper.connect(proxy=proxy)
|
||||||
|
|
||||||
browsersteps_start_session['browserstepper'] = browserstepper
|
browsersteps_start_session['browserstepper'] = browserstepper
|
||||||
|
|
||||||
# For test
|
|
||||||
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
|
||||||
|
|
||||||
return browsersteps_start_session
|
return browsersteps_start_session
|
||||||
|
|
||||||
|
|
||||||
@@ -238,7 +272,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||||
def browsersteps_start_session():
|
def browsersteps_start_session():
|
||||||
# A new session was requested, return sessionID
|
# A new session was requested, return sessionID
|
||||||
import asyncio
|
|
||||||
import uuid
|
import uuid
|
||||||
browsersteps_session_id = str(uuid.uuid4())
|
browsersteps_session_id = str(uuid.uuid4())
|
||||||
watch_uuid = request.args.get('uuid')
|
watch_uuid = request.args.get('uuid')
|
||||||
@@ -301,11 +334,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
|
||||||
def browsersteps_ui_update():
|
def browsersteps_ui_update():
|
||||||
import base64
|
import base64
|
||||||
import playwright._impl._errors
|
|
||||||
from changedetectionio.blueprint.browser_steps import browser_steps
|
|
||||||
|
|
||||||
remaining =0
|
remaining = 0
|
||||||
uuid = request.args.get('uuid')
|
uuid = request.args.get('uuid')
|
||||||
|
goto_website_url_first_step = request.args.get('goto_website_url_first_step')
|
||||||
|
|
||||||
browsersteps_session_id = request.args.get('browsersteps_session_id')
|
browsersteps_session_id = request.args.get('browsersteps_session_id')
|
||||||
|
|
||||||
@@ -316,33 +348,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
return make_response('No session exists under that ID', 500)
|
return make_response('No session exists under that ID', 500)
|
||||||
|
|
||||||
is_last_step = False
|
is_last_step = False
|
||||||
# Actions - step/apply/etc, do the thing and return state
|
|
||||||
if request.method == 'POST':
|
# @todo - should always be an existing session
|
||||||
# @todo - should always be an existing session
|
if goto_website_url_first_step:
|
||||||
|
logger.debug("Going to site (requested automatically before stepping)..")
|
||||||
|
step_operation = "Goto site"
|
||||||
|
step_selector = None
|
||||||
|
step_optional_value = None
|
||||||
|
else:
|
||||||
step_operation = request.form.get('operation')
|
step_operation = request.form.get('operation')
|
||||||
step_selector = request.form.get('selector')
|
step_selector = request.form.get('selector')
|
||||||
step_optional_value = request.form.get('optional_value')
|
step_optional_value = request.form.get('optional_value')
|
||||||
is_last_step = strtobool(request.form.get('is_last_step'))
|
is_last_step = strtobool(request.form.get('is_last_step'))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Run the async call_action method in the dedicated browser steps event loop
|
# Run the async call_action method in the dedicated browser steps event loop
|
||||||
run_async_in_browser_loop(
|
run_async_in_browser_loop(
|
||||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
|
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
|
||||||
action_name=step_operation,
|
action_name=step_operation,
|
||||||
selector=step_selector,
|
selector=step_selector,
|
||||||
optional_value=step_optional_value
|
optional_value=step_optional_value
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
|
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
|
||||||
# Try to find something of value to give back to the user
|
# Try to find something of value to give back to the user
|
||||||
return make_response(str(e).splitlines()[0], 401)
|
return make_response(str(e).splitlines()[0], 401)
|
||||||
|
|
||||||
|
|
||||||
# if not this_session.page:
|
|
||||||
# cleanup_playwright_session()
|
|
||||||
# return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
|
||||||
|
|
||||||
# Screenshots and other info only needed on requesting a step (POST)
|
# Screenshots and other info only needed on requesting a step (POST)
|
||||||
try:
|
try:
|
||||||
@@ -350,7 +382,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
(screenshot, xpath_data) = run_async_in_browser_loop(
|
(screenshot, xpath_data) = run_async_in_browser_loop(
|
||||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_last_step:
|
if is_last_step:
|
||||||
watch = datastore.data['watching'].get(uuid)
|
watch = datastore.data['watching'].get(uuid)
|
||||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||||
|
|||||||
@@ -94,13 +94,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@check_proxies_blueprint.route("/<string:uuid>/status", methods=['GET'])
|
@check_proxies_blueprint.route("/<uuid_str:uuid>/status", methods=['GET'])
|
||||||
def get_recheck_status(uuid):
|
def get_recheck_status(uuid):
|
||||||
results = _recalc_check_status(uuid=uuid)
|
results = _recalc_check_status(uuid=uuid)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@check_proxies_blueprint.route("/<string:uuid>/start", methods=['GET'])
|
@check_proxies_blueprint.route("/<uuid_str:uuid>/start", methods=['GET'])
|
||||||
def start_check(uuid):
|
def start_check(uuid):
|
||||||
|
|
||||||
if not datastore.proxy_list:
|
if not datastore.proxy_list:
|
||||||
|
|||||||
@@ -16,6 +16,11 @@
|
|||||||
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
|
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||||
<div class="tab-pane-inner" id="url-list">
|
<div class="tab-pane-inner" id="url-list">
|
||||||
|
|
||||||
|
<p>
|
||||||
|
{{ _('Restoring changedetection.io backups is in the') }}<a href="{{ url_for('backups.restore.restore') }}"> {{ _('backups section') }}</a>.
|
||||||
|
<br>
|
||||||
|
</p>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
|
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
|
||||||
<br>
|
<br>
|
||||||
@@ -37,9 +42,6 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="tab-pane-inner" id="distill-io">
|
<div class="tab-pane-inner" id="distill-io">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
|
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
|
||||||
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
|
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
|
||||||
@@ -49,8 +51,6 @@
|
|||||||
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
|
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
|
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
|
||||||
font-family:monospace;
|
font-family:monospace;
|
||||||
white-space: pre;
|
white-space: pre;
|
||||||
@@ -114,6 +114,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
|
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
|
||||||
|
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
|||||||
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
|
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
|
@price_data_follower_blueprint.route("/<uuid_str:uuid>/accept", methods=['GET'])
|
||||||
def accept(uuid):
|
def accept(uuid):
|
||||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||||
@@ -25,7 +25,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
|||||||
return redirect(url_for("watchlist.index"))
|
return redirect(url_for("watchlist.index"))
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
@price_data_follower_blueprint.route("/<uuid_str:uuid>/reject", methods=['GET'])
|
||||||
def reject(uuid):
|
def reject(uuid):
|
||||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
||||||
datastore.data['watching'][uuid].commit()
|
datastore.data['watching'][uuid].commit()
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
|||||||
datastore: The ChangeDetectionStore instance
|
datastore: The ChangeDetectionStore instance
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
|
@rss_blueprint.route("/watch/<uuid_str:uuid>", methods=['GET'])
|
||||||
def rss_single_watch(uuid):
|
def rss_single_watch(uuid):
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from flask import make_response, request
|
from flask import make_response, request, Response
|
||||||
|
from flask_babel import lazy_gettext as _l
|
||||||
from feedgen.feed import FeedGenerator
|
from feedgen.feed import FeedGenerator
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
@@ -42,12 +43,12 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
|||||||
# Get the watch by UUID
|
# Get the watch by UUID
|
||||||
watch = datastore.data['watching'].get(uuid)
|
watch = datastore.data['watching'].get(uuid)
|
||||||
if not watch:
|
if not watch:
|
||||||
return f"Watch with UUID {uuid} not found", 404
|
return Response(_l("Watch with UUID %(uuid)s not found", uuid=uuid), status=404, mimetype='text/plain')
|
||||||
|
|
||||||
# Check if watch has at least 2 history snapshots
|
# Check if watch has at least 2 history snapshots
|
||||||
dates = list(watch.history.keys())
|
dates = list(watch.history.keys())
|
||||||
if len(dates) < 2:
|
if len(dates) < 2:
|
||||||
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
|
return Response(_l("Watch %(uuid)s does not have enough history snapshots to show changes (need at least 2)", uuid=uuid), status=400, mimetype='text/plain')
|
||||||
|
|
||||||
# Get the number of diffs to include (default: 5)
|
# Get the number of diffs to include (default: 5)
|
||||||
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
|
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
|
|||||||
datastore: The ChangeDetectionStore instance
|
datastore: The ChangeDetectionStore instance
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
|
||||||
def rss_tag_feed(tag_uuid):
|
def rss_tag_feed(tag_uuid):
|
||||||
|
|
||||||
from flask import make_response, request, url_for
|
from flask import make_response, request, url_for
|
||||||
|
|||||||
@@ -83,6 +83,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
datastore.data['settings']['requests'].update(form.data['requests'])
|
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||||
datastore.commit()
|
datastore.commit()
|
||||||
|
|
||||||
|
# Clear all checksums to force reprocessing with new settings
|
||||||
|
# Global settings can affect watch behavior (filters, rendering, etc.)
|
||||||
|
datastore.clear_all_last_checksums()
|
||||||
|
|
||||||
# Adjust worker count if it changed
|
# Adjust worker count if it changed
|
||||||
if new_worker_count != old_worker_count:
|
if new_worker_count != old_worker_count:
|
||||||
from changedetectionio import worker_pool
|
from changedetectionio import worker_pool
|
||||||
|
|||||||
@@ -25,7 +25,7 @@
|
|||||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||||
<li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
|
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
|
||||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||||
{% if plugin_tabs %}
|
{% if plugin_tabs %}
|
||||||
@@ -154,9 +154,8 @@
|
|||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
<br>
|
<br>
|
||||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -352,7 +351,7 @@ nav
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
|
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
|
||||||
|
|
||||||
<div class="pure-control-group" id="extra-proxies-setting">
|
<div class="pure-control-group" id="extra-proxies-setting">
|
||||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
return redirect(url_for('tags.tags_overview_page'))
|
return redirect(url_for('tags.tags_overview_page'))
|
||||||
|
|
||||||
@tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
|
@tags_blueprint.route("/mute/<uuid_str:uuid>", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def mute(uuid):
|
def mute(uuid):
|
||||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||||
@@ -63,24 +63,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
tag.commit()
|
tag.commit()
|
||||||
return redirect(url_for('tags.tags_overview_page'))
|
return redirect(url_for('tags.tags_overview_page'))
|
||||||
|
|
||||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
@tags_blueprint.route("/delete/<uuid_str:uuid>", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def delete(uuid):
|
def delete(uuid):
|
||||||
# Delete the tag from settings immediately
|
# Delete the tag from settings immediately
|
||||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||||
del datastore.data['settings']['application']['tags'][uuid]
|
del datastore.data['settings']['application']['tags'][uuid]
|
||||||
|
|
||||||
# Delete tag.json file if it exists
|
|
||||||
import os
|
|
||||||
tag_dir = os.path.join(datastore.datastore_path, uuid)
|
|
||||||
tag_json = os.path.join(tag_dir, "tag.json")
|
|
||||||
if os.path.exists(tag_json):
|
|
||||||
try:
|
|
||||||
os.unlink(tag_json)
|
|
||||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
|
||||||
|
|
||||||
# Remove tag from all watches in background thread to avoid blocking
|
# Remove tag from all watches in background thread to avoid blocking
|
||||||
def remove_tag_background(tag_uuid):
|
def remove_tag_background(tag_uuid):
|
||||||
"""Background thread to remove tag from watches - discarded after completion."""
|
"""Background thread to remove tag from watches - discarded after completion."""
|
||||||
@@ -101,7 +90,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
flash(gettext("Tag deleted, removing from watches in background"))
|
flash(gettext("Tag deleted, removing from watches in background"))
|
||||||
return redirect(url_for('tags.tags_overview_page'))
|
return redirect(url_for('tags.tags_overview_page'))
|
||||||
|
|
||||||
@tags_blueprint.route("/unlink/<string:uuid>", methods=['GET'])
|
@tags_blueprint.route("/unlink/<uuid_str:uuid>", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def unlink(uuid):
|
def unlink(uuid):
|
||||||
# Unlink tag from all watches in background thread to avoid blocking
|
# Unlink tag from all watches in background thread to avoid blocking
|
||||||
@@ -127,19 +116,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
@tags_blueprint.route("/delete_all", methods=['GET'])
|
@tags_blueprint.route("/delete_all", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def delete_all():
|
def delete_all():
|
||||||
# Delete all tag.json files
|
|
||||||
import os
|
|
||||||
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
|
|
||||||
tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
|
|
||||||
tag_json = os.path.join(tag_dir, "tag.json")
|
|
||||||
if os.path.exists(tag_json):
|
|
||||||
try:
|
|
||||||
os.unlink(tag_json)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")
|
|
||||||
|
|
||||||
# Clear all tags from settings immediately
|
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
|
||||||
datastore.data['settings']['application']['tags'] = {}
|
# TagsDict 'del' handler will remove the dir
|
||||||
|
del datastore.data['settings']['application']['tags'][tag_uuid]
|
||||||
|
|
||||||
|
|
||||||
# Clear tags from all watches in background thread to avoid blocking
|
# Clear tags from all watches in background thread to avoid blocking
|
||||||
def clear_all_tags_background():
|
def clear_all_tags_background():
|
||||||
@@ -160,7 +141,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
flash(gettext("All tags deleted, clearing from watches in background"))
|
flash(gettext("All tags deleted, clearing from watches in background"))
|
||||||
return redirect(url_for('tags.tags_overview_page'))
|
return redirect(url_for('tags.tags_overview_page'))
|
||||||
|
|
||||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['GET'])
|
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def form_tag_edit(uuid):
|
def form_tag_edit(uuid):
|
||||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||||
@@ -179,6 +160,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
default_system_settings = datastore.data['settings'],
|
default_system_settings = datastore.data['settings'],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Bridge API-stored processor_config_* values into the form's FormField sub-forms.
|
||||||
|
# The API stores processor_config_restock_diff in the tag dict; find the matching
|
||||||
|
# FormField by checking which one's sub-fields cover the config keys.
|
||||||
|
from wtforms.fields.form import FormField as WTFormField
|
||||||
|
for key, value in default.items():
|
||||||
|
if not key.startswith('processor_config_') or not isinstance(value, dict):
|
||||||
|
continue
|
||||||
|
for form_field in form:
|
||||||
|
if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value):
|
||||||
|
for sub_key, sub_value in value.items():
|
||||||
|
sub_field = form_field.form._fields.get(sub_key)
|
||||||
|
if sub_field is not None:
|
||||||
|
sub_field.data = sub_value
|
||||||
|
break
|
||||||
|
|
||||||
template_args = {
|
template_args = {
|
||||||
'data': default,
|
'data': default,
|
||||||
'form': form,
|
'form': form,
|
||||||
@@ -222,7 +218,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['POST'])
|
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['POST'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def form_tag_edit_submit(uuid):
|
def form_tag_edit_submit(uuid):
|
||||||
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
from changedetectionio.blueprint.tags.form import group_restock_settings_form
|
||||||
@@ -244,12 +240,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
tag.update(form.data)
|
tag.update(form.data)
|
||||||
tag['processor'] = 'restock_diff'
|
tag['processor'] = 'restock_diff'
|
||||||
tag.commit()
|
tag.commit()
|
||||||
|
|
||||||
|
# Clear checksums for all watches using this tag to force reprocessing
|
||||||
|
# Tag changes affect inherited configuration
|
||||||
|
cleared_count = datastore.clear_checksums_for_tag(uuid)
|
||||||
|
logger.info(f"Tag {uuid} updated, cleared {cleared_count} watch checksums")
|
||||||
|
|
||||||
flash(gettext("Updated"))
|
flash(gettext("Updated"))
|
||||||
|
|
||||||
return redirect(url_for('tags.tags_overview_page'))
|
return redirect(url_for('tags.tags_overview_page'))
|
||||||
|
|
||||||
|
|
||||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
|
||||||
def form_tag_delete(uuid):
|
|
||||||
return redirect(url_for('tags.tags_overview_page'))
|
|
||||||
return tags_blueprint
|
return tags_blueprint
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
|||||||
# Import the login decorator
|
# Import the login decorator
|
||||||
from changedetectionio.auth_decorator import login_optionally_required
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
|
||||||
@ui_blueprint.route("/clear_history/<string:uuid>", methods=['GET'])
|
@ui_blueprint.route("/clear_history/<uuid_str:uuid>", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def clear_watch_history(uuid):
|
def clear_watch_history(uuid):
|
||||||
try:
|
try:
|
||||||
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
|||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def clear_all_history():
|
def clear_all_history():
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
confirmtext = request.form.get('confirmtext')
|
confirmtext = request.form.get('confirmtext', '')
|
||||||
|
|
||||||
if confirmtext == 'clear':
|
if confirmtext.strip().lower() == gettext('clear').strip().lower():
|
||||||
# Run in background thread to avoid blocking
|
# Run in background thread to avoid blocking
|
||||||
def clear_history_background():
|
def clear_history_background():
|
||||||
# Capture UUIDs first to avoid race conditions
|
# Capture UUIDs first to avoid race conditions
|
||||||
@@ -194,9 +194,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
|||||||
tag_limit = request.args.get('tag')
|
tag_limit = request.args.get('tag')
|
||||||
now = int(time.time())
|
now = int(time.time())
|
||||||
|
|
||||||
# Mark watches as viewed in background thread to avoid blocking
|
# Mark watches as viewed - use background thread only for large watch counts
|
||||||
def mark_viewed_background():
|
def mark_viewed_impl():
|
||||||
"""Background thread to mark watches as viewed - discarded after completion."""
|
"""Mark watches as viewed - can run synchronously or in background thread."""
|
||||||
marked_count = 0
|
marked_count = 0
|
||||||
try:
|
try:
|
||||||
for watch_uuid, watch in datastore.data['watching'].items():
|
for watch_uuid, watch in datastore.data['watching'].items():
|
||||||
@@ -209,15 +209,21 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
|||||||
datastore.set_last_viewed(watch_uuid, now)
|
datastore.set_last_viewed(watch_uuid, now)
|
||||||
marked_count += 1
|
marked_count += 1
|
||||||
|
|
||||||
logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
|
logger.info(f"Marking complete: {marked_count} watches marked as viewed")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in background mark as viewed: {e}")
|
logger.error(f"Error marking as viewed: {e}")
|
||||||
|
|
||||||
# Start background thread and return immediately
|
# For small watch counts (< 10), run synchronously to avoid race conditions in tests
|
||||||
thread = threading.Thread(target=mark_viewed_background, daemon=True)
|
# For larger counts, use background thread to avoid blocking the UI
|
||||||
thread.start()
|
watch_count = len(datastore.data['watching'])
|
||||||
|
if watch_count < 10:
|
||||||
|
# Run synchronously for small watch counts
|
||||||
|
mark_viewed_impl()
|
||||||
|
else:
|
||||||
|
# Start background thread for large watch counts
|
||||||
|
thread = threading.Thread(target=mark_viewed_impl, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
flash(gettext("Marking watches as viewed in background..."))
|
|
||||||
return redirect(url_for('watchlist.index', tag=tag_limit))
|
return redirect(url_for('watchlist.index', tag=tag_limit))
|
||||||
|
|
||||||
@ui_blueprint.route("/delete", methods=['GET'])
|
@ui_blueprint.route("/delete", methods=['GET'])
|
||||||
@@ -360,7 +366,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
|||||||
return redirect(url_for('watchlist.index'))
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
|
||||||
@ui_blueprint.route("/share-url/<string:uuid>", methods=['GET'])
|
@ui_blueprint.route("/share-url/<uuid_str:uuid>", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def form_share_put_watch(uuid):
|
def form_share_put_watch(uuid):
|
||||||
"""Given a watch UUID, upload the info and return a share-link
|
"""Given a watch UUID, upload the info and return a share-link
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
return Markup(result)
|
return Markup(result)
|
||||||
|
|
||||||
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
@diff_blueprint.route("/diff/<uuid_str:uuid>", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def diff_history_page(uuid):
|
def diff_history_page(uuid):
|
||||||
"""
|
"""
|
||||||
@@ -128,7 +128,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
redirect=redirect
|
redirect=redirect
|
||||||
)
|
)
|
||||||
|
|
||||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
|
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def diff_history_page_extract_GET(uuid):
|
def diff_history_page_extract_GET(uuid):
|
||||||
"""
|
"""
|
||||||
@@ -182,7 +182,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
redirect=redirect
|
redirect=redirect
|
||||||
)
|
)
|
||||||
|
|
||||||
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
|
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['POST'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def diff_history_page_extract_POST(uuid):
|
def diff_history_page_extract_POST(uuid):
|
||||||
"""
|
"""
|
||||||
@@ -238,7 +238,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
redirect=redirect
|
redirect=redirect
|
||||||
)
|
)
|
||||||
|
|
||||||
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
@diff_blueprint.route("/diff/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def processor_asset(uuid, asset_name):
|
def processor_asset(uuid, asset_name):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -20,13 +20,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
|
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST'])
|
@edit_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
||||||
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
|
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
|
||||||
def edit_page(uuid):
|
def edit_page(uuid):
|
||||||
from changedetectionio import forms
|
from changedetectionio import forms
|
||||||
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
|
from changedetectionio.browser_steps.browser_steps import browser_step_ui_config
|
||||||
from changedetectionio import processors
|
from changedetectionio import processors
|
||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
@@ -117,12 +117,25 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
processor_config = processor_instance.get_extra_watch_config(config_filename)
|
||||||
|
|
||||||
if processor_config:
|
if processor_config:
|
||||||
|
from wtforms.fields.form import FormField
|
||||||
# Populate processor-config-* fields from JSON
|
# Populate processor-config-* fields from JSON
|
||||||
for config_key, config_value in processor_config.items():
|
for config_key, config_value in processor_config.items():
|
||||||
field_name = f'processor_config_{config_key}'
|
if not isinstance(config_value, dict):
|
||||||
if hasattr(form, field_name):
|
continue
|
||||||
getattr(form, field_name).data = config_value
|
# Try exact API-named field first (e.g., processor_config_restock_diff)
|
||||||
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
|
target_field = getattr(form, f'processor_config_{config_key}', None)
|
||||||
|
# Fallback: find any FormField sub-form whose fields cover config_value keys
|
||||||
|
if target_field is None:
|
||||||
|
for form_field in form:
|
||||||
|
if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value):
|
||||||
|
target_field = form_field
|
||||||
|
break
|
||||||
|
if target_field is not None:
|
||||||
|
for sub_key, sub_value in config_value.items():
|
||||||
|
sub_field = target_field.form._fields.get(sub_key)
|
||||||
|
if sub_field is not None:
|
||||||
|
sub_field.data = sub_value
|
||||||
|
logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to load processor config: {e}")
|
logger.warning(f"Failed to load processor config: {e}")
|
||||||
|
|
||||||
@@ -327,7 +340,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@edit_blueprint.route("/edit/<string:uuid>/get-html", methods=['GET'])
|
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-html", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def watch_get_latest_html(uuid):
|
def watch_get_latest_html(uuid):
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
@@ -354,8 +367,58 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
# Return a 500 error
|
# Return a 500 error
|
||||||
abort(500)
|
abort(500)
|
||||||
|
|
||||||
|
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-data-package", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def watch_get_data_package(uuid):
|
||||||
|
"""Download all data for a single watch as a zip file"""
|
||||||
|
from io import BytesIO
|
||||||
|
from flask import send_file
|
||||||
|
import zipfile
|
||||||
|
from pathlib import Path
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
watch = datastore.data['watching'].get(uuid)
|
||||||
|
if not watch:
|
||||||
|
abort(404)
|
||||||
|
|
||||||
|
# Create zip in memory
|
||||||
|
memory_file = BytesIO()
|
||||||
|
|
||||||
|
with zipfile.ZipFile(memory_file, 'w',
|
||||||
|
compression=zipfile.ZIP_DEFLATED,
|
||||||
|
compresslevel=8) as zipObj:
|
||||||
|
|
||||||
|
# Add the watch's JSON file if it exists
|
||||||
|
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||||
|
if os.path.isfile(watch_json_path):
|
||||||
|
zipObj.write(watch_json_path,
|
||||||
|
arcname=os.path.join(uuid, 'watch.json'),
|
||||||
|
compress_type=zipfile.ZIP_DEFLATED,
|
||||||
|
compresslevel=8)
|
||||||
|
|
||||||
|
# Add all files in the watch data directory
|
||||||
|
if os.path.isdir(watch.data_dir):
|
||||||
|
for f in Path(watch.data_dir).glob('*'):
|
||||||
|
if f.is_file() and f.name != 'watch.json': # Skip watch.json since we already added it
|
||||||
|
zipObj.write(f,
|
||||||
|
arcname=os.path.join(uuid, f.name),
|
||||||
|
compress_type=zipfile.ZIP_DEFLATED,
|
||||||
|
compresslevel=8)
|
||||||
|
|
||||||
|
# Seek to beginning of file
|
||||||
|
memory_file.seek(0)
|
||||||
|
|
||||||
|
# Generate filename with timestamp
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
filename = f"watch-data-{uuid[:8]}-{timestamp}.zip"
|
||||||
|
|
||||||
|
return send_file(memory_file,
|
||||||
|
as_attachment=True,
|
||||||
|
download_name=filename,
|
||||||
|
mimetype='application/zip')
|
||||||
|
|
||||||
# Ajax callback
|
# Ajax callback
|
||||||
@edit_blueprint.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
|
@edit_blueprint.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def watch_get_preview_rendered(uuid):
|
def watch_get_preview_rendered(uuid):
|
||||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||||
|
|||||||
@@ -10,7 +10,8 @@ from changedetectionio import html_tools
|
|||||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||||
|
|
||||||
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
|
||||||
|
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def preview_page(uuid):
|
def preview_page(uuid):
|
||||||
"""
|
"""
|
||||||
@@ -59,12 +60,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
versions = []
|
versions = []
|
||||||
timestamp = None
|
timestamp = None
|
||||||
|
|
||||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
|
||||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||||
|
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||||
is_html_webdriver = False
|
|
||||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
|
||||||
is_html_webdriver = True
|
|
||||||
|
|
||||||
triggered_line_numbers = []
|
triggered_line_numbers = []
|
||||||
ignored_line_numbers = []
|
ignored_line_numbers = []
|
||||||
@@ -74,7 +71,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||||
else:
|
else:
|
||||||
# So prepare the latest preview or not
|
# So prepare the latest preview or not
|
||||||
preferred_version = request.args.get('version')
|
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
|
||||||
|
|
||||||
|
|
||||||
versions = list(watch.history.keys())
|
versions = list(watch.history.keys())
|
||||||
timestamp = versions[-1]
|
timestamp = versions[-1]
|
||||||
if preferred_version and preferred_version in versions:
|
if preferred_version and preferred_version in versions:
|
||||||
@@ -125,7 +124,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@preview_blueprint.route("/preview/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
@preview_blueprint.route("/preview/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def processor_asset(uuid, asset_name):
|
def processor_asset(uuid, asset_name):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -488,6 +488,7 @@ Math: {{ 1 + 1 }}") }}
|
|||||||
{% if watch.history_n %}
|
{% if watch.history_n %}
|
||||||
<p>
|
<p>
|
||||||
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">{{ _('Download latest HTML snapshot') }}</a>
|
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">{{ _('Download latest HTML snapshot') }}</a>
|
||||||
|
<a href="{{url_for('ui.ui_edit.watch_get_data_package', uuid=uuid)}}" class="pure-button button-small">{{ _('Download watch data package') }}</a>
|
||||||
</p>
|
</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||||
{% if versions|length >= 2 %}
|
{% if versions|length >= 2 %}
|
||||||
<div id="diff-form" style="text-align: center;">
|
<div id="diff-form" style="text-align: center;">
|
||||||
<form class="pure-form " action="" method="POST">
|
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||||
name="from_version"
|
name="from_version"
|
||||||
@@ -28,6 +28,7 @@
|
|||||||
</option>
|
</option>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</select>
|
</select>
|
||||||
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||||
|
|
||||||
</fieldset>
|
</fieldset>
|
||||||
|
|||||||
@@ -81,6 +81,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
|
|
||||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||||
|
|
||||||
|
proxy_list = datastore.proxy_list
|
||||||
output = render_template(
|
output = render_template(
|
||||||
"watch-overview.html",
|
"watch-overview.html",
|
||||||
active_tag=active_tag,
|
active_tag=active_tag,
|
||||||
@@ -92,7 +93,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
form=form,
|
form=form,
|
||||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||||
guid=datastore.data['app_guid'],
|
guid=datastore.data['app_guid'],
|
||||||
has_proxies=datastore.proxy_list,
|
has_proxies=proxy_list,
|
||||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||||
now_time_server=round(time.time()),
|
now_time_server=round(time.time()),
|
||||||
pagination=pagination,
|
pagination=pagination,
|
||||||
@@ -110,6 +111,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
watches=sorted_watches
|
watches=sorted_watches
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Return freed template-building memory to the OS immediately.
|
||||||
|
# render_template allocates ~20MB of intermediate strings that are freed on return,
|
||||||
|
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
|
||||||
|
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
|
||||||
|
try:
|
||||||
|
import ctypes
|
||||||
|
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
if session.get('share-link'):
|
if session.get('share-link'):
|
||||||
del (session['share-link'])
|
del (session['share-link'])
|
||||||
|
|
||||||
|
|||||||
@@ -213,12 +213,13 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
|||||||
{%- set checking_now = is_checking_now(watch) -%}
|
{%- set checking_now = is_checking_now(watch) -%}
|
||||||
{%- set history_n = watch.history_n -%}
|
{%- set history_n = watch.history_n -%}
|
||||||
{%- set favicon = watch.get_favicon_filename() -%}
|
{%- set favicon = watch.get_favicon_filename() -%}
|
||||||
|
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
|
||||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||||
{%- set row_classes = [
|
{%- set row_classes = [
|
||||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||||
'processor-' ~ watch['processor'],
|
'processor-' ~ watch['processor'],
|
||||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
'has-error' if error_texts|length > 2 else '',
|
||||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||||
'unviewed' if watch.has_unviewed else '',
|
'unviewed' if watch.has_unviewed else '',
|
||||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||||
@@ -271,7 +272,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||||
</span>
|
</span>
|
||||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
|
||||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||||
@@ -304,11 +305,20 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
|||||||
</span>
|
</span>
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
|
|
||||||
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
|
{%- if watch.get('restock') and watch['restock'].get('price') -%}
|
||||||
{%- if watch['restock']['price'] != None -%}
|
{%- set restock = watch['restock'] -%}
|
||||||
<span class="restock-label price" title="{{ _('Price') }}">
|
{%- set price = restock.get('price') -%}
|
||||||
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
|
{%- set cur = restock.get('currency','') -%}
|
||||||
</span>
|
|
||||||
|
{%- if price is not none and (price|string)|regex_search('\d') -%}
|
||||||
|
<span class="restock-label price" title="{{ _('Price') }}">
|
||||||
|
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
|
||||||
|
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
|
||||||
|
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
|
||||||
|
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
|
||||||
|
{{ price }} {{ cur }}<!-- as string -->
|
||||||
|
{%- endif -%}
|
||||||
|
</span>
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
{%- elif not watch.has_restock_info -%}
|
{%- elif not watch.has_restock_info -%}
|
||||||
<span class="restock-label error">{{ _('No information') }}</span>
|
<span class="restock-label error">{{ _('No information') }}</span>
|
||||||
|
|||||||
+11
@@ -8,6 +8,17 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
|
|||||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.jinja2_custom import render as jinja_render
|
||||||
|
|
||||||
|
def browser_steps_get_valid_steps(browser_steps: list):
|
||||||
|
if browser_steps is not None and len(browser_steps):
|
||||||
|
valid_steps = list(filter(
|
||||||
|
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one'),browser_steps))
|
||||||
|
|
||||||
|
# Just incase they selected Goto site by accident with older JS
|
||||||
|
if valid_steps and valid_steps[0]['operation'] == 'Goto site':
|
||||||
|
del(valid_steps[0])
|
||||||
|
|
||||||
|
return valid_steps
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
||||||
@@ -38,7 +38,6 @@ def manage_user_agent(headers, current_ua=''):
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
class Fetcher():
|
class Fetcher():
|
||||||
browser_connection_is_custom = None
|
browser_connection_is_custom = None
|
||||||
browser_connection_url = None
|
browser_connection_url = None
|
||||||
@@ -163,30 +162,16 @@ class Fetcher():
|
|||||||
"""
|
"""
|
||||||
return {k.lower(): v for k, v in self.headers.items()}
|
return {k.lower(): v for k, v in self.headers.items()}
|
||||||
|
|
||||||
def browser_steps_get_valid_steps(self):
|
|
||||||
if self.browser_steps is not None and len(self.browser_steps):
|
|
||||||
valid_steps = list(filter(
|
|
||||||
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one'),
|
|
||||||
self.browser_steps))
|
|
||||||
|
|
||||||
# Just incase they selected Goto site by accident with older JS
|
|
||||||
if valid_steps and valid_steps[0]['operation'] == 'Goto site':
|
|
||||||
del(valid_steps[0])
|
|
||||||
|
|
||||||
return valid_steps
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def iterate_browser_steps(self, start_url=None):
|
async def iterate_browser_steps(self, start_url=None):
|
||||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface, browser_steps_get_valid_steps
|
||||||
from playwright._impl._errors import TimeoutError, Error
|
from playwright._impl._errors import TimeoutError, Error
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.jinja2_custom import render as jinja_render
|
||||||
step_n = 0
|
step_n = 0
|
||||||
|
|
||||||
if self.browser_steps is not None and len(self.browser_steps):
|
if self.browser_steps:
|
||||||
interface = steppable_browser_interface(start_url=start_url)
|
interface = steppable_browser_interface(start_url=start_url)
|
||||||
interface.page = self.page
|
interface.page = self.page
|
||||||
valid_steps = self.browser_steps_get_valid_steps()
|
valid_steps = browser_steps_get_valid_steps(self.browser_steps)
|
||||||
|
|
||||||
for step in valid_steps:
|
for step in valid_steps:
|
||||||
step_n += 1
|
step_n += 1
|
||||||
|
|||||||
@@ -295,7 +295,7 @@ class fetcher(Fetcher):
|
|||||||
self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
||||||
|
|
||||||
# Re-use as much code from browser steps as possible so its the same
|
# Re-use as much code from browser steps as possible so its the same
|
||||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface
|
||||||
browsersteps_interface = steppable_browser_interface(start_url=url)
|
browsersteps_interface = steppable_browser_interface(start_url=url)
|
||||||
browsersteps_interface.page = self.page
|
browsersteps_interface.page = self.page
|
||||||
|
|
||||||
@@ -362,7 +362,7 @@ class fetcher(Fetcher):
|
|||||||
# Wrap remaining operations in try/finally to ensure cleanup
|
# Wrap remaining operations in try/finally to ensure cleanup
|
||||||
try:
|
try:
|
||||||
# Run Browser Steps here
|
# Run Browser Steps here
|
||||||
if self.browser_steps_get_valid_steps():
|
if self.browser_steps:
|
||||||
try:
|
try:
|
||||||
await self.iterate_browser_steps(start_url=url)
|
await self.iterate_browser_steps(start_url=url)
|
||||||
except BrowserStepsStepException:
|
except BrowserStepsStepException:
|
||||||
|
|||||||
@@ -86,8 +86,8 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
|||||||
# better than scrollTo incase they override it in the page
|
# better than scrollTo incase they override it in the page
|
||||||
await page.evaluate(
|
await page.evaluate(
|
||||||
"""(y) => {
|
"""(y) => {
|
||||||
document.documentElement.scrollTop = y;
|
const el = document.scrollingElement;
|
||||||
document.body.scrollTop = y;
|
if (el) el.scrollTop = y;
|
||||||
}""",
|
}""",
|
||||||
y
|
y
|
||||||
)
|
)
|
||||||
@@ -305,6 +305,8 @@ class fetcher(Fetcher):
|
|||||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
||||||
except Exception as cleanup_error:
|
except Exception as cleanup_error:
|
||||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
||||||
|
finally:
|
||||||
|
self.browser = None
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# Add console handler to capture console.log from favicon fetcher
|
# Add console handler to capture console.log from favicon fetcher
|
||||||
@@ -456,7 +458,7 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
# Run Browser Steps here
|
# Run Browser Steps here
|
||||||
# @todo not yet supported, we switch to playwright in this case
|
# @todo not yet supported, we switch to playwright in this case
|
||||||
# if self.browser_steps_get_valid_steps():
|
# if self.browser_steps:
|
||||||
# self.iterate_browser_steps()
|
# self.iterate_browser_steps()
|
||||||
|
|
||||||
|
|
||||||
@@ -532,6 +534,14 @@ class fetcher(Fetcher):
|
|||||||
)
|
)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||||
|
finally:
|
||||||
|
# Internal cleanup on any exception/timeout - call quit() immediately
|
||||||
|
# This prevents connection leaks during exception bursts
|
||||||
|
# Worker.py's quit() call becomes a redundant safety net (idempotent)
|
||||||
|
try:
|
||||||
|
await self.quit(watch={'uuid': watch_uuid} if watch_uuid else None)
|
||||||
|
except Exception as cleanup_error:
|
||||||
|
logger.error(f"[{watch_uuid}] Error during internal quit() cleanup: {cleanup_error}")
|
||||||
|
|
||||||
|
|
||||||
# Plugin registration for built-in fetcher
|
# Plugin registration for built-in fetcher
|
||||||
|
|||||||
@@ -1,12 +1,14 @@
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import asyncio
|
import asyncio
|
||||||
from functools import partial
|
|
||||||
from changedetectionio import strtobool
|
from changedetectionio import strtobool
|
||||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||||
from changedetectionio.content_fetchers.base import Fetcher
|
from changedetectionio.content_fetchers.base import Fetcher
|
||||||
|
from changedetectionio.validate_url import is_private_hostname
|
||||||
|
|
||||||
|
|
||||||
# "html_requests" is listed as the default fetcher in store.py!
|
# "html_requests" is listed as the default fetcher in store.py!
|
||||||
@@ -36,7 +38,7 @@ class fetcher(Fetcher):
|
|||||||
import requests
|
import requests
|
||||||
from requests.exceptions import ProxyError, ConnectionError, RequestException
|
from requests.exceptions import ProxyError, ConnectionError, RequestException
|
||||||
|
|
||||||
if self.browser_steps_get_valid_steps():
|
if self.browser_steps:
|
||||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||||
|
|
||||||
proxies = {}
|
proxies = {}
|
||||||
@@ -79,14 +81,48 @@ class fetcher(Fetcher):
|
|||||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||||
from requests_file import FileAdapter
|
from requests_file import FileAdapter
|
||||||
session.mount('file://', FileAdapter())
|
session.mount('file://', FileAdapter())
|
||||||
|
|
||||||
|
allow_iana_restricted = strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false'))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Fresh DNS check at fetch time — catches DNS rebinding regardless of add-time cache.
|
||||||
|
if not allow_iana_restricted:
|
||||||
|
parsed_initial = urlparse(url)
|
||||||
|
if parsed_initial.hostname and is_private_hostname(parsed_initial.hostname):
|
||||||
|
raise Exception(f"Fetch blocked: '{url}' resolves to a private/reserved IP address. "
|
||||||
|
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow.")
|
||||||
|
|
||||||
r = session.request(method=request_method,
|
r = session.request(method=request_method,
|
||||||
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||||
url=url,
|
url=url,
|
||||||
headers=request_headers,
|
headers=request_headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
proxies=proxies,
|
proxies=proxies,
|
||||||
verify=False)
|
verify=False,
|
||||||
|
allow_redirects=False)
|
||||||
|
|
||||||
|
# Manually follow redirects so each hop's resolved IP can be validated,
|
||||||
|
# preventing SSRF via an open redirect on a public host.
|
||||||
|
current_url = url
|
||||||
|
for _ in range(10):
|
||||||
|
if not r.is_redirect:
|
||||||
|
break
|
||||||
|
location = r.headers.get('Location', '')
|
||||||
|
redirect_url = urljoin(current_url, location)
|
||||||
|
if not allow_iana_restricted:
|
||||||
|
parsed_redirect = urlparse(redirect_url)
|
||||||
|
if parsed_redirect.hostname and is_private_hostname(parsed_redirect.hostname):
|
||||||
|
raise Exception(f"Redirect blocked: '{redirect_url}' resolves to a private/reserved IP address.")
|
||||||
|
current_url = redirect_url
|
||||||
|
r = session.request('GET', redirect_url,
|
||||||
|
headers=request_headers,
|
||||||
|
timeout=timeout,
|
||||||
|
proxies=proxies,
|
||||||
|
verify=False,
|
||||||
|
allow_redirects=False)
|
||||||
|
else:
|
||||||
|
raise Exception("Too many redirects")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
msg = str(e)
|
msg = str(e)
|
||||||
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
|
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
|
||||||
@@ -112,10 +148,32 @@ class fetcher(Fetcher):
|
|||||||
# Default to UTF-8 for XML if no encoding found
|
# Default to UTF-8 for XML if no encoding found
|
||||||
r.encoding = 'utf-8'
|
r.encoding = 'utf-8'
|
||||||
else:
|
else:
|
||||||
# For other content types, use chardet
|
# No charset in HTTP header - sniff encoding in priority order matching browsers
|
||||||
encoding = chardet.detect(r.content)['encoding']
|
# (WHATWG encoding sniffing algorithm):
|
||||||
if encoding:
|
# 1. BOM - highest confidence, check before anything else
|
||||||
r.encoding = encoding
|
# 2. <meta charset> in first 2kb
|
||||||
|
# 3. chardet statistical detection - last resort
|
||||||
|
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||||
|
boms = [
|
||||||
|
(b'\xef\xbb\xbf', 'utf-8-sig'),
|
||||||
|
(b'\xff\xfe', 'utf-16-le'),
|
||||||
|
(b'\xfe\xff', 'utf-16-be'),
|
||||||
|
]
|
||||||
|
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
|
||||||
|
if bom_encoding:
|
||||||
|
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
|
||||||
|
r.encoding = bom_encoding
|
||||||
|
else:
|
||||||
|
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
|
||||||
|
if meta_charset_match:
|
||||||
|
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
|
||||||
|
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
|
||||||
|
r.encoding = encoding
|
||||||
|
else:
|
||||||
|
encoding = chardet.detect(r.content)['encoding']
|
||||||
|
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
|
||||||
|
if encoding:
|
||||||
|
r.encoding = encoding
|
||||||
|
|
||||||
self.headers = r.headers
|
self.headers = r.headers
|
||||||
|
|
||||||
@@ -184,7 +242,6 @@ class fetcher(Fetcher):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def quit(self, watch=None):
|
async def quit(self, watch=None):
|
||||||
|
|
||||||
# In case they switched to `requests` fetcher from something else
|
# In case they switched to `requests` fetcher from something else
|
||||||
# Then the screenshot could be old, in any case, it's not used here.
|
# Then the screenshot could be old, in any case, it's not used here.
|
||||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
|
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import flask_login
|
|||||||
import locale
|
import locale
|
||||||
import os
|
import os
|
||||||
import queue
|
import queue
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
@@ -27,7 +28,6 @@ from flask import (
|
|||||||
session,
|
session,
|
||||||
url_for,
|
url_for,
|
||||||
)
|
)
|
||||||
from flask_compress import Compress as FlaskCompress
|
|
||||||
from flask_restful import abort, Api
|
from flask_restful import abort, Api
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
|
|
||||||
@@ -40,7 +40,7 @@ from loguru import logger
|
|||||||
|
|
||||||
from changedetectionio import __version__
|
from changedetectionio import __version__
|
||||||
from changedetectionio import queuedWatchMetaData
|
from changedetectionio import queuedWatchMetaData
|
||||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon, Spec
|
||||||
from changedetectionio.api.Search import Search
|
from changedetectionio.api.Search import Search
|
||||||
from .time_handler import is_within_schedule
|
from .time_handler import is_within_schedule
|
||||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||||
@@ -69,15 +69,43 @@ socketio_server = None
|
|||||||
|
|
||||||
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
|
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
|
||||||
CORS(app)
|
CORS(app)
|
||||||
|
from werkzeug.routing import BaseConverter, ValidationError
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
class StrictUUIDConverter(BaseConverter):
|
||||||
|
# Special sentinel values allowed in addition to strict UUIDs
|
||||||
|
_ALLOWED_SENTINELS = frozenset({'first'})
|
||||||
|
|
||||||
|
def to_python(self, value: str) -> str:
|
||||||
|
if value in self._ALLOWED_SENTINELS:
|
||||||
|
return value
|
||||||
|
try:
|
||||||
|
u = UUID(value)
|
||||||
|
except ValueError as e:
|
||||||
|
raise ValidationError() from e
|
||||||
|
# Reject non-standard formats (braces, URNs, no-hyphens)
|
||||||
|
if str(u) != value.lower():
|
||||||
|
raise ValidationError()
|
||||||
|
return str(u)
|
||||||
|
|
||||||
|
def to_url(self, value) -> str:
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
# app setup (once)
|
||||||
|
app.url_map.converters["uuid_str"] = StrictUUIDConverter
|
||||||
|
|
||||||
|
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak.
|
||||||
|
# There's also a bug between flask compress and socketio that causes some kind of slow memory leak
|
||||||
|
# It's better to use compression on your reverse proxy (nginx etc) instead.
|
||||||
|
if strtobool(os.getenv("FLASK_ENABLE_COMPRESSION")):
|
||||||
|
from flask_compress import Compress as FlaskCompress
|
||||||
|
app.config['COMPRESS_MIN_SIZE'] = 2096
|
||||||
|
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||||
|
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
||||||
|
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
||||||
|
compress = FlaskCompress()
|
||||||
|
compress.init_app(app)
|
||||||
|
|
||||||
# Super handy for compressing large BrowserSteps responses and others
|
|
||||||
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak
|
|
||||||
compress = FlaskCompress()
|
|
||||||
app.config['COMPRESS_MIN_SIZE'] = 2096
|
|
||||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
|
||||||
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
|
||||||
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
|
||||||
compress.init_app(app)
|
|
||||||
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
||||||
|
|
||||||
|
|
||||||
@@ -190,9 +218,13 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
|||||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||||
# Format the number with two decimal places (locale format string will return 6 decimal)
|
# Format the number with two decimal places (locale format string will return 6 decimal)
|
||||||
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
formatted_value = locale.format_string("%.2f", value, grouping=True)
|
||||||
|
|
||||||
return formatted_value
|
return formatted_value
|
||||||
|
|
||||||
|
@app.template_filter('regex_search')
|
||||||
|
def _jinja2_filter_regex_search(value, pattern):
|
||||||
|
import re
|
||||||
|
return re.search(pattern, str(value)) is not None
|
||||||
|
|
||||||
@app.template_global('is_checking_now')
|
@app.template_global('is_checking_now')
|
||||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||||
return worker_pool.is_watch_running(watch_obj['uuid'])
|
return worker_pool.is_watch_running(watch_obj['uuid'])
|
||||||
@@ -356,6 +388,8 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
|||||||
|
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
|
||||||
|
|
||||||
@app.template_filter('sanitize_tag_class')
|
@app.template_filter('sanitize_tag_class')
|
||||||
def _jinja2_filter_sanitize_tag_class(tag_title):
|
def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||||
"""Sanitize a tag title to create a valid CSS class name.
|
"""Sanitize a tag title to create a valid CSS class name.
|
||||||
@@ -367,9 +401,8 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
|
|||||||
Returns:
|
Returns:
|
||||||
str: A sanitized string suitable for use as a CSS class name
|
str: A sanitized string suitable for use as a CSS class name
|
||||||
"""
|
"""
|
||||||
import re
|
|
||||||
# Remove all non-alphanumeric characters and convert to lowercase
|
# Remove all non-alphanumeric characters and convert to lowercase
|
||||||
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
|
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
|
||||||
# Ensure it starts with a letter (CSS requirement)
|
# Ensure it starts with a letter (CSS requirement)
|
||||||
if sanitized and not sanitized[0].isalpha():
|
if sanitized and not sanitized[0].isalpha():
|
||||||
sanitized = 'tag' + sanitized
|
sanitized = 'tag' + sanitized
|
||||||
@@ -457,28 +490,21 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
available_languages = get_available_languages()
|
available_languages = get_available_languages()
|
||||||
language_codes = get_language_codes()
|
language_codes = get_language_codes()
|
||||||
|
|
||||||
def get_locale():
|
_locale_aliases = {
|
||||||
# Locale aliases: map browser language codes to translation directory names
|
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
}
|
||||||
locale_aliases = {
|
_locale_match_list = language_codes + list(_locale_aliases.keys())
|
||||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
|
||||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
|
||||||
}
|
|
||||||
|
|
||||||
|
def get_locale():
|
||||||
# 1. Try to get locale from session (user explicitly selected)
|
# 1. Try to get locale from session (user explicitly selected)
|
||||||
if 'locale' in session:
|
if 'locale' in session:
|
||||||
return session['locale']
|
return session['locale']
|
||||||
|
|
||||||
# 2. Fall back to Accept-Language header
|
# 2. Fall back to Accept-Language header
|
||||||
# Get the best match from browser's Accept-Language header
|
browser_locale = request.accept_languages.best_match(_locale_match_list)
|
||||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
# 3. Map browser locale to our internal locale if needed
|
||||||
|
return _locale_aliases.get(browser_locale, browser_locale)
|
||||||
# 3. Check if we need to map the browser locale to our internal locale
|
|
||||||
if browser_locale in locale_aliases:
|
|
||||||
return locale_aliases[browser_locale]
|
|
||||||
|
|
||||||
return browser_locale
|
|
||||||
|
|
||||||
# Initialize Babel with locale selector
|
# Initialize Babel with locale selector
|
||||||
babel = Babel(app, locale_selector=get_locale)
|
babel = Babel(app, locale_selector=get_locale)
|
||||||
@@ -530,22 +556,22 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
|
|
||||||
watch_api.add_resource(WatchHistoryDiff,
|
watch_api.add_resource(WatchHistoryDiff,
|
||||||
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
'/api/v1/watch/<uuid_str:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
|
||||||
resource_class_kwargs={'datastore': datastore})
|
resource_class_kwargs={'datastore': datastore})
|
||||||
watch_api.add_resource(WatchSingleHistory,
|
watch_api.add_resource(WatchSingleHistory,
|
||||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
'/api/v1/watch/<uuid_str:uuid>/history/<string:timestamp>',
|
||||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||||
watch_api.add_resource(WatchFavicon,
|
watch_api.add_resource(WatchFavicon,
|
||||||
'/api/v1/watch/<string:uuid>/favicon',
|
'/api/v1/watch/<uuid_str:uuid>/favicon',
|
||||||
resource_class_kwargs={'datastore': datastore})
|
resource_class_kwargs={'datastore': datastore})
|
||||||
watch_api.add_resource(WatchHistory,
|
watch_api.add_resource(WatchHistory,
|
||||||
'/api/v1/watch/<string:uuid>/history',
|
'/api/v1/watch/<uuid_str:uuid>/history',
|
||||||
resource_class_kwargs={'datastore': datastore})
|
resource_class_kwargs={'datastore': datastore})
|
||||||
|
|
||||||
watch_api.add_resource(CreateWatch, '/api/v1/watch',
|
watch_api.add_resource(CreateWatch, '/api/v1/watch',
|
||||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||||
|
|
||||||
watch_api.add_resource(Watch, '/api/v1/watch/<string:uuid>',
|
watch_api.add_resource(Watch, '/api/v1/watch/<uuid_str:uuid>',
|
||||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||||
|
|
||||||
watch_api.add_resource(SystemInfo, '/api/v1/systeminfo',
|
watch_api.add_resource(SystemInfo, '/api/v1/systeminfo',
|
||||||
@@ -558,7 +584,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
watch_api.add_resource(Tags, '/api/v1/tags',
|
watch_api.add_resource(Tags, '/api/v1/tags',
|
||||||
resource_class_kwargs={'datastore': datastore})
|
resource_class_kwargs={'datastore': datastore})
|
||||||
|
|
||||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>',
|
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<uuid_str:uuid>',
|
||||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||||
|
|
||||||
watch_api.add_resource(Search, '/api/v1/search',
|
watch_api.add_resource(Search, '/api/v1/search',
|
||||||
@@ -567,6 +593,8 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
watch_api.add_resource(Notifications, '/api/v1/notifications',
|
watch_api.add_resource(Notifications, '/api/v1/notifications',
|
||||||
resource_class_kwargs={'datastore': datastore})
|
resource_class_kwargs={'datastore': datastore})
|
||||||
|
|
||||||
|
watch_api.add_resource(Spec, '/api/v1/full-spec')
|
||||||
|
|
||||||
@login_manager.user_loader
|
@login_manager.user_loader
|
||||||
def user_loader(email):
|
def user_loader(email):
|
||||||
user = User()
|
user = User()
|
||||||
@@ -708,8 +736,14 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
def static_content(group, filename):
|
def static_content(group, filename):
|
||||||
from flask import make_response
|
from flask import make_response
|
||||||
import re
|
import re
|
||||||
group = re.sub(r'[^\w.-]+', '', group.lower())
|
|
||||||
filename = re.sub(r'[^\w.-]+', '', filename.lower())
|
# Strict sanitization: only allow a-z, 0-9, and underscore (blocks .. and other traversal)
|
||||||
|
group = re.sub(r'[^a-z0-9_-]+', '', group.lower())
|
||||||
|
filename = filename
|
||||||
|
|
||||||
|
# Additional safety: reject if sanitization resulted in empty strings
|
||||||
|
if not group or not filename:
|
||||||
|
abort(404)
|
||||||
|
|
||||||
if group == 'screenshot':
|
if group == 'screenshot':
|
||||||
# Could be sensitive, follow password requirements
|
# Could be sensitive, follow password requirements
|
||||||
@@ -983,15 +1017,16 @@ def check_for_new_version():
|
|||||||
import urllib3
|
import urllib3
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
|
session.verify = False
|
||||||
|
|
||||||
while not app.config.exit.is_set():
|
while not app.config.exit.is_set():
|
||||||
try:
|
try:
|
||||||
r = requests.post("https://changedetection.io/check-ver.php",
|
r = session.post("https://changedetection.io/check-ver.php",
|
||||||
data={'version': __version__,
|
data={'version': __version__,
|
||||||
'app_guid': datastore.data['app_guid'],
|
'app_guid': datastore.data['app_guid'],
|
||||||
'watch_count': len(datastore.data['watching'])
|
'watch_count': len(datastore.data['watching'])
|
||||||
},
|
})
|
||||||
|
|
||||||
verify=False)
|
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ from flask_babel import lazy_gettext as _l, gettext
|
|||||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
|
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
|
||||||
from changedetectionio.conditions.form import ConditionFormRow
|
from changedetectionio.conditions.form import ConditionFormRow
|
||||||
from changedetectionio.notification_service import NotificationContextData
|
from changedetectionio.notification_service import NotificationContextData
|
||||||
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS, \
|
|
||||||
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from changedetectionio import processors
|
from changedetectionio import processors
|
||||||
|
|
||||||
@@ -37,7 +35,7 @@ from changedetectionio.widgets import TernaryNoneBooleanField
|
|||||||
|
|
||||||
# default
|
# default
|
||||||
# each select <option data-enabled="enabled-0-0"
|
# each select <option data-enabled="enabled-0-0"
|
||||||
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
|
from changedetectionio.browser_steps.browser_steps import browser_step_ui_config
|
||||||
|
|
||||||
from changedetectionio import html_tools, content_fetchers
|
from changedetectionio import html_tools, content_fetchers
|
||||||
|
|
||||||
@@ -494,7 +492,6 @@ class ValidateJinja2Template(object):
|
|||||||
Validates that a {token} is from a valid set
|
Validates that a {token} is from a valid set
|
||||||
"""
|
"""
|
||||||
def __call__(self, form, field):
|
def __call__(self, form, field):
|
||||||
from changedetectionio import notification
|
|
||||||
from changedetectionio.jinja2_custom import create_jinja_env
|
from changedetectionio.jinja2_custom import create_jinja_env
|
||||||
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
|
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
|
||||||
from jinja2.meta import find_undeclared_variables
|
from jinja2.meta import find_undeclared_variables
|
||||||
@@ -611,13 +608,12 @@ class ValidateCSSJSONXPATHInput(object):
|
|||||||
raise ValidationError("XPath not permitted in this field!")
|
raise ValidationError("XPath not permitted in this field!")
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
import elementpath
|
import elementpath
|
||||||
# xpath 2.0-3.1
|
from changedetectionio.html_tools import SafeXPath3Parser
|
||||||
from elementpath.xpath3 import XPath3Parser
|
|
||||||
tree = html.fromstring("<html></html>")
|
tree = html.fromstring("<html></html>")
|
||||||
line = line.replace('xpath:', '')
|
line = line.replace('xpath:', '')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
|
||||||
except elementpath.ElementPathError as e:
|
except elementpath.ElementPathError as e:
|
||||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||||
raise ValidationError(message % (line, str(e)))
|
raise ValidationError(message % (line, str(e)))
|
||||||
@@ -820,8 +816,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
|||||||
filter_text_removed = BooleanField(_l('Removed lines'), default=True)
|
filter_text_removed = BooleanField(_l('Removed lines'), default=True)
|
||||||
|
|
||||||
trigger_text = StringListField(_l('Keyword triggers - Trigger/wait for text'), [validators.Optional(), ValidateListRegex()])
|
trigger_text = StringListField(_l('Keyword triggers - Trigger/wait for text'), [validators.Optional(), ValidateListRegex()])
|
||||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
|
||||||
text_should_not_be_present = StringListField(_l('Block change-detection while text matches'), [validators.Optional(), ValidateListRegex()])
|
text_should_not_be_present = StringListField(_l('Block change-detection while text matches'), [validators.Optional(), ValidateListRegex()])
|
||||||
webdriver_js_execute_code = TextAreaField(_l('Execute JavaScript before change detection'), render_kw={"rows": "5"}, validators=[validators.Optional()])
|
webdriver_js_execute_code = TextAreaField(_l('Execute JavaScript before change detection'), render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,53 @@ class JSONNotFound(ValueError):
|
|||||||
def __init__(self, msg):
|
def __init__(self, msg):
|
||||||
ValueError.__init__(self, msg)
|
ValueError.__init__(self, msg)
|
||||||
|
|
||||||
|
|
||||||
|
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
|
||||||
|
'unparsed-text',
|
||||||
|
'unparsed-text-lines',
|
||||||
|
'unparsed-text-available',
|
||||||
|
'doc',
|
||||||
|
'doc-available',
|
||||||
|
'environment-variable',
|
||||||
|
'available-environment-variables',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _build_safe_xpath3_parser():
|
||||||
|
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
|
||||||
|
|
||||||
|
XPath 3.0 includes functions that can read arbitrary files or environment variables:
|
||||||
|
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
|
||||||
|
- doc / doc-available (XML fetch from URI)
|
||||||
|
- environment-variable / available-environment-variables (env var leakage)
|
||||||
|
|
||||||
|
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
|
||||||
|
so removing entries here does not affect XPath3Parser itself.
|
||||||
|
|
||||||
|
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
|
||||||
|
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from elementpath.xpath3 import XPath3Parser
|
||||||
|
|
||||||
|
class SafeXPath3Parser(XPath3Parser):
|
||||||
|
pass
|
||||||
|
|
||||||
|
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
|
||||||
|
if env_override is not None:
|
||||||
|
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
|
||||||
|
else:
|
||||||
|
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
|
||||||
|
|
||||||
|
for _fn in blocked:
|
||||||
|
SafeXPath3Parser.symbol_table.pop(_fn, None)
|
||||||
|
|
||||||
|
return SafeXPath3Parser
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level singleton — built once, reused everywhere.
|
||||||
|
SafeXPath3Parser = _build_safe_xpath3_parser()
|
||||||
|
|
||||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||||
# So convert it to inline flag "(?i)foobar" type configuration
|
# So convert it to inline flag "(?i)foobar" type configuration
|
||||||
@lru_cache(maxsize=100)
|
@lru_cache(maxsize=100)
|
||||||
@@ -183,8 +230,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
|||||||
"""
|
"""
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
import elementpath
|
import elementpath
|
||||||
# xpath 2.0-3.1
|
|
||||||
from elementpath.xpath3 import XPath3Parser
|
|
||||||
|
|
||||||
parser = etree.HTMLParser()
|
parser = etree.HTMLParser()
|
||||||
tree = None
|
tree = None
|
||||||
@@ -210,7 +255,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
|||||||
# This allows //title to match elements in the default namespace
|
# This allows //title to match elements in the default namespace
|
||||||
namespaces[''] = tree.nsmap[None]
|
namespaces[''] = tree.nsmap[None]
|
||||||
|
|
||||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
|
||||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||||
|
|
||||||
@@ -235,6 +280,9 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
|||||||
else:
|
else:
|
||||||
html_block += elementpath_tostring(element)
|
html_block += elementpath_tostring(element)
|
||||||
|
|
||||||
|
# Drop element references before the finally block so tree.clear() can release
|
||||||
|
# the libxml2 document immediately (elements pin the C-level doc via refcount).
|
||||||
|
del r
|
||||||
return html_block
|
return html_block
|
||||||
finally:
|
finally:
|
||||||
# Explicitly clear the tree to free memory
|
# Explicitly clear the tree to free memory
|
||||||
@@ -439,13 +487,25 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||||
else:
|
else:
|
||||||
# Probably something else, go fish inside for it
|
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
|
||||||
try:
|
# Server may claim application/json but actually return JSONP
|
||||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
|
||||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
if jsonp_match:
|
||||||
json_filter=json_filter )
|
try:
|
||||||
except json.JSONDecodeError as e:
|
inner = jsonp_match.group(1).strip()
|
||||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
|
||||||
|
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
|
||||||
|
|
||||||
|
if not stripped_text_from_html:
|
||||||
|
# Probably something else, go fish inside for it
|
||||||
|
try:
|
||||||
|
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||||
|
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||||
|
json_filter=json_filter)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||||
|
|
||||||
if not stripped_text_from_html:
|
if not stripped_text_from_html:
|
||||||
# Re 265 - Just return an empty string when filter not found
|
# Re 265 - Just return an empty string when filter not found
|
||||||
@@ -561,10 +621,33 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
parser_config = None
|
parser_config = None
|
||||||
|
|
||||||
if is_rss:
|
if is_rss:
|
||||||
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
|
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
|
||||||
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
||||||
|
else:
|
||||||
|
# Use BS4 html.parser to strip bloat — SPA's often dump 10MB+ of CSS/JS into <head>,
|
||||||
|
# causing inscriptis to silently give up. Regex-based stripping is unsafe because tags
|
||||||
|
# can appear inside JSON data attributes with JS-escaped closing tags (e.g. <\/script>),
|
||||||
|
# causing the regex to scan past the intended close and eat real page content.
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
soup = BeautifulSoup(html_content, 'html.parser')
|
||||||
|
# Strip tags that inscriptis cannot render as meaningful text and which can be very large.
|
||||||
|
# svg/math: produce path-data/MathML garbage; canvas/iframe/template: no inscriptis handlers.
|
||||||
|
# video/audio/picture are kept — they may contain meaningful fallback text or captions.
|
||||||
|
for tag in soup.find_all(['head', 'script', 'style', 'noscript', 'svg',
|
||||||
|
'math', 'canvas', 'iframe', 'template']):
|
||||||
|
tag.decompose()
|
||||||
|
|
||||||
|
# SPAs often use <body style="display:none"> to hide content until JS loads.
|
||||||
|
# inscriptis respects CSS display rules, so strip hiding styles from the body tag.
|
||||||
|
body_tag = soup.find('body')
|
||||||
|
if body_tag and body_tag.get('style'):
|
||||||
|
style = body_tag['style']
|
||||||
|
if re.search(r'\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b', style, re.IGNORECASE):
|
||||||
|
logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{style}')")
|
||||||
|
del body_tag['style']
|
||||||
|
|
||||||
|
html_content = str(soup)
|
||||||
|
|
||||||
text_content = get_text(html_content, config=parser_config)
|
text_content = get_text(html_content, config=parser_config)
|
||||||
return text_content
|
return text_content
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ def get_timeago_locale(flask_locale):
|
|||||||
'no': 'nb_NO', # Norwegian Bokmål
|
'no': 'nb_NO', # Norwegian Bokmål
|
||||||
'hi': 'in_HI', # Hindi
|
'hi': 'in_HI', # Hindi
|
||||||
'cs': 'en', # Czech not supported by timeago, fallback to English
|
'cs': 'en', # Czech not supported by timeago, fallback to English
|
||||||
|
'uk': 'uk', # Ukrainian
|
||||||
'en_GB': 'en', # British English - timeago uses 'en'
|
'en_GB': 'en', # British English - timeago uses 'en'
|
||||||
'en_US': 'en', # American English - timeago uses 'en'
|
'en_US': 'en', # American English - timeago uses 'en'
|
||||||
}
|
}
|
||||||
@@ -67,6 +68,7 @@ LANGUAGE_DATA = {
|
|||||||
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
|
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
|
||||||
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
|
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
|
||||||
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
|
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
|
||||||
|
'uk': {'flag': 'fi fi-ua fis', 'name': 'Українська'},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from os import getenv
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
|
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
|
||||||
|
from changedetectionio.model.Tags import TagsDict
|
||||||
|
|
||||||
from changedetectionio.notification import (
|
from changedetectionio.notification import (
|
||||||
default_notification_body,
|
default_notification_body,
|
||||||
@@ -68,7 +69,7 @@ class model(dict):
|
|||||||
'schema_version' : 0,
|
'schema_version' : 0,
|
||||||
'shared_diff_access': False,
|
'shared_diff_access': False,
|
||||||
'strip_ignored_lines': False,
|
'strip_ignored_lines': False,
|
||||||
'tags': {}, #@todo use Tag.model initialisers
|
'tags': None, # Initialized in __init__ with real datastore_path
|
||||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||||
'ui': {
|
'ui': {
|
||||||
'use_page_title_in_list': True,
|
'use_page_title_in_list': True,
|
||||||
@@ -80,10 +81,16 @@ class model(dict):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, *arg, **kw):
|
def __init__(self, *arg, datastore_path=None, **kw):
|
||||||
super(model, self).__init__(*arg, **kw)
|
super(model, self).__init__(*arg, **kw)
|
||||||
|
# Capture any tags data passed in before base_config overwrites the structure
|
||||||
|
existing_tags = self.get('settings', {}).get('application', {}).get('tags') or {}
|
||||||
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
|
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
|
||||||
self.update(deepcopy(self.base_config))
|
self.update(deepcopy(self.base_config))
|
||||||
|
# TagsDict requires the real datastore_path at runtime (cannot be set at class-definition time)
|
||||||
|
if datastore_path is None:
|
||||||
|
raise ValueError("App.model() requires 'datastore_path' keyword argument")
|
||||||
|
self['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=datastore_path)
|
||||||
|
|
||||||
|
|
||||||
def parse_headers_from_text_file(filepath):
|
def parse_headers_from_text_file(filepath):
|
||||||
|
|||||||
@@ -0,0 +1,39 @@
|
|||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
_SENTINEL = object()
|
||||||
|
|
||||||
|
|
||||||
|
class TagsDict(dict):
|
||||||
|
"""Dict subclass that removes the corresponding tag.json file when a tag is deleted."""
|
||||||
|
|
||||||
|
def __init__(self, *args, datastore_path: str | os.PathLike, **kwargs) -> None:
|
||||||
|
self._datastore_path = Path(datastore_path)
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def __delitem__(self, key: str) -> None:
|
||||||
|
super().__delitem__(key)
|
||||||
|
tag_dir = self._datastore_path / key
|
||||||
|
tag_json_file = tag_dir / "tag.json"
|
||||||
|
if not os.path.exists(tag_json_file):
|
||||||
|
logger.critical(f"Aborting deletion of directory '{tag_dir}' because '{tag_json_file}' does not exist.")
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
shutil.rmtree(tag_dir)
|
||||||
|
logger.info(f"Deleted tag directory for tag {key!r}")
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Failed to delete tag directory for tag {key!r}: {e}")
|
||||||
|
|
||||||
|
def pop(self, key: str, default=_SENTINEL):
|
||||||
|
"""Remove and return tag, deleting its tag.json file. Raises KeyError if missing and no default given."""
|
||||||
|
if key in self:
|
||||||
|
value = self[key]
|
||||||
|
del self[key]
|
||||||
|
return value
|
||||||
|
if default is _SENTINEL:
|
||||||
|
raise KeyError(key)
|
||||||
|
return default
|
||||||
@@ -43,6 +43,11 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
|||||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||||
|
|
||||||
|
# Module-level favicon filename cache: data_dir → basename (or None)
|
||||||
|
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
|
||||||
|
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
|
||||||
|
_FAVICON_FILENAME_CACHE: dict = {}
|
||||||
|
|
||||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||||
|
|
||||||
@@ -335,7 +340,6 @@ class model(EntityPersistenceMixin, watch_base):
|
|||||||
'last_notification_error': False,
|
'last_notification_error': False,
|
||||||
'last_viewed': 0,
|
'last_viewed': 0,
|
||||||
'previous_md5': False,
|
'previous_md5': False,
|
||||||
'previous_md5_before_filters': False,
|
|
||||||
'remote_server_reply': None,
|
'remote_server_reply': None,
|
||||||
'track_ldjson_price_data': None
|
'track_ldjson_price_data': None
|
||||||
})
|
})
|
||||||
@@ -384,12 +388,37 @@ class model(EntityPersistenceMixin, watch_base):
|
|||||||
|
|
||||||
return self.get('fetch_backend')
|
return self.get('fetch_backend')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def fetcher_supports_screenshots(self):
|
||||||
|
"""Return True if the fetcher configured for this watch supports screenshots.
|
||||||
|
|
||||||
|
Resolves 'system' via self._datastore, then checks supports_screenshots on
|
||||||
|
the actual fetcher class. Works for built-in and plugin fetchers alike.
|
||||||
|
"""
|
||||||
|
from changedetectionio import content_fetchers
|
||||||
|
|
||||||
|
fetcher_name = self.get_fetch_backend # already handles is_pdf → html_requests
|
||||||
|
if not fetcher_name or fetcher_name == 'system':
|
||||||
|
fetcher_name = self._datastore['settings']['application'].get('fetch_backend', 'html_requests')
|
||||||
|
|
||||||
|
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
||||||
|
if fetcher_class is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return bool(getattr(fetcher_class, 'supports_screenshots', False))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_pdf(self):
|
def is_pdf(self):
|
||||||
# content_type field is set in the future
|
url = str(self.get("url") or "").lower()
|
||||||
# https://github.com/dgtlmoon/changedetection.io/issues/1392
|
content_type = str(self.get("content-type") or "").lower()
|
||||||
# Not sure the best logic here
|
|
||||||
return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()
|
if content_type in ("none", "null", ""):
|
||||||
|
content_type = ""
|
||||||
|
|
||||||
|
return (
|
||||||
|
url.endswith(".pdf")
|
||||||
|
or content_type.split(";")[0].strip() == "application/pdf"
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def label(self):
|
def label(self):
|
||||||
@@ -801,9 +830,8 @@ class model(EntityPersistenceMixin, watch_base):
|
|||||||
with open(fname, 'wb') as f:
|
with open(fname, 'wb') as f:
|
||||||
f.write(decoded)
|
f.write(decoded)
|
||||||
|
|
||||||
# Invalidate favicon filename cache
|
# Invalidate module-level favicon filename cache for this watch
|
||||||
if hasattr(self, '_favicon_filename_cache'):
|
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
|
||||||
delattr(self, '_favicon_filename_cache')
|
|
||||||
|
|
||||||
# A signal that could trigger the socket server to update the browser also
|
# A signal that could trigger the socket server to update the browser also
|
||||||
watch_check_update = signal('watch_favicon_bump')
|
watch_check_update = signal('watch_favicon_bump')
|
||||||
@@ -818,35 +846,23 @@ class model(EntityPersistenceMixin, watch_base):
|
|||||||
|
|
||||||
def get_favicon_filename(self) -> str | None:
|
def get_favicon_filename(self) -> str | None:
|
||||||
"""
|
"""
|
||||||
Find any favicon.* file in the current working directory
|
Find any favicon.* file in the watch data directory.
|
||||||
and return the contents of the newest one.
|
|
||||||
|
|
||||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
|
||||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
deepcopy (which drops instance attrs), and concurrent request races.
|
||||||
|
Invalidated by bump_favicon() when a new favicon is saved.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Basename of the newest favicon file, or None if not found.
|
str: Basename of the favicon file, or None if not found.
|
||||||
"""
|
"""
|
||||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
if self.data_dir in _FAVICON_FILENAME_CACHE:
|
||||||
cache_key = '_favicon_filename_cache'
|
return _FAVICON_FILENAME_CACHE[self.data_dir]
|
||||||
if hasattr(self, cache_key):
|
|
||||||
return getattr(self, cache_key)
|
|
||||||
|
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
# Search for all favicon.* files
|
|
||||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||||
|
fname = os.path.basename(files[0]) if files else None
|
||||||
if not files:
|
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
|
||||||
result = None
|
return fname
|
||||||
else:
|
|
||||||
# Find the newest by modification time
|
|
||||||
newest_file = max(files, key=os.path.getmtime)
|
|
||||||
result = os.path.basename(newest_file)
|
|
||||||
|
|
||||||
# Cache the result
|
|
||||||
setattr(self, cache_key, result)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||||
"""Return path to a square thumbnail of the most recent screenshot.
|
"""Return path to a square thumbnail of the most recent screenshot.
|
||||||
@@ -1177,18 +1193,13 @@ class model(EntityPersistenceMixin, watch_base):
|
|||||||
def compile_error_texts(self, has_proxies=None):
|
def compile_error_texts(self, has_proxies=None):
|
||||||
"""Compile error texts for this watch.
|
"""Compile error texts for this watch.
|
||||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
Accepts has_proxies parameter to ensure it works even outside app context"""
|
||||||
from flask import url_for
|
from flask import url_for, has_request_context
|
||||||
from markupsafe import Markup
|
from markupsafe import Markup
|
||||||
|
|
||||||
output = [] # Initialize as list since we're using append
|
output = [] # Initialize as list since we're using append
|
||||||
last_error = self.get('last_error','')
|
last_error = self.get('last_error','')
|
||||||
|
|
||||||
try:
|
has_app_context = has_request_context()
|
||||||
url_for('settings.settings_page')
|
|
||||||
except Exception as e:
|
|
||||||
has_app_context = False
|
|
||||||
else:
|
|
||||||
has_app_context = True
|
|
||||||
|
|
||||||
# has app+request context, we can use url_for()
|
# has app+request context, we can use url_for()
|
||||||
if has_app_context:
|
if has_app_context:
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ from .persistence import EntityPersistenceMixin, _determine_entity_type
|
|||||||
|
|
||||||
__all__ = ['EntityPersistenceMixin', 'watch_base']
|
__all__ = ['EntityPersistenceMixin', 'watch_base']
|
||||||
|
|
||||||
|
from ..browser_steps.browser_steps import browser_steps_get_valid_steps
|
||||||
|
|
||||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
||||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||||
|
|
||||||
@@ -129,7 +131,6 @@ class watch_base(dict):
|
|||||||
fetch_time (float): Duration of last fetch in seconds
|
fetch_time (float): Duration of last fetch in seconds
|
||||||
consecutive_filter_failures (int): Counter for consecutive filter match failures
|
consecutive_filter_failures (int): Counter for consecutive filter match failures
|
||||||
previous_md5 (str|bool): MD5 hash of previous content
|
previous_md5 (str|bool): MD5 hash of previous content
|
||||||
previous_md5_before_filters (str|bool): MD5 hash before filters applied
|
|
||||||
history_snapshot_max_length (int|None): Max history snapshots to keep (None = use global)
|
history_snapshot_max_length (int|None): Max history snapshots to keep (None = use global)
|
||||||
|
|
||||||
Conditions:
|
Conditions:
|
||||||
@@ -166,6 +167,10 @@ class watch_base(dict):
|
|||||||
if kw.get('datastore_path'):
|
if kw.get('datastore_path'):
|
||||||
del kw['datastore_path']
|
del kw['datastore_path']
|
||||||
|
|
||||||
|
# IMPORTANT: Don't initialize __watch_was_edited yet!
|
||||||
|
# We'll initialize it AFTER the initial update() call below
|
||||||
|
# This prevents marking the watch as edited during initialization
|
||||||
|
|
||||||
self.update({
|
self.update({
|
||||||
# Custom notification content
|
# Custom notification content
|
||||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||||
@@ -211,7 +216,6 @@ class watch_base(dict):
|
|||||||
'page_title': None, # <title> from the page
|
'page_title': None, # <title> from the page
|
||||||
'paused': False,
|
'paused': False,
|
||||||
'previous_md5': False,
|
'previous_md5': False,
|
||||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
|
||||||
'processor': 'text_json_diff', # could be restock_diff or others from .processors
|
'processor': 'text_json_diff', # could be restock_diff or others from .processors
|
||||||
'price_change_threshold_percent': None,
|
'price_change_threshold_percent': None,
|
||||||
'proxy': None, # Preferred proxy connection
|
'proxy': None, # Preferred proxy connection
|
||||||
@@ -297,9 +301,121 @@ class watch_base(dict):
|
|||||||
|
|
||||||
super(watch_base, self).__init__(*arg, **kw)
|
super(watch_base, self).__init__(*arg, **kw)
|
||||||
|
|
||||||
|
# Check if we're being initialized from an existing watch object
|
||||||
|
# that has was_edited=True, so we can preserve the flag
|
||||||
|
preserve_edited_flag = False
|
||||||
if self.get('default'):
|
if self.get('default'):
|
||||||
|
# When creating a new watch object from an existing one (e.g., changing processor),
|
||||||
|
# preserve the was_edited flag if it was True
|
||||||
|
default_watch = self.get('default')
|
||||||
|
if hasattr(default_watch, 'was_edited') and default_watch.was_edited:
|
||||||
|
preserve_edited_flag = True
|
||||||
del self['default']
|
del self['default']
|
||||||
|
|
||||||
|
# NOW initialize the edited flag after all initial setup is complete
|
||||||
|
# This ensures initialization doesn't trigger the edited flag
|
||||||
|
# But preserve it if the source watch had it set to True
|
||||||
|
self.__watch_was_edited = preserve_edited_flag
|
||||||
|
|
||||||
|
def _mark_field_as_edited(self, key):
|
||||||
|
"""
|
||||||
|
Helper to mark a field as edited if it's writable.
|
||||||
|
|
||||||
|
Internal method used by __setitem__, update(), pop(), etc.
|
||||||
|
"""
|
||||||
|
# Don't track edits during initial load or if already edited
|
||||||
|
if not hasattr(self, '_watch_base__watch_was_edited'):
|
||||||
|
return
|
||||||
|
if self.__watch_was_edited:
|
||||||
|
return # Already marked as edited
|
||||||
|
|
||||||
|
# Import from shared schema utilities (no circular dependency)
|
||||||
|
from .schema_utils import get_readonly_watch_fields
|
||||||
|
readonly_fields = get_readonly_watch_fields()
|
||||||
|
|
||||||
|
# Additional system-managed fields not in OpenAPI spec (yet)
|
||||||
|
# These are set by processors/workers and should not trigger edited flag
|
||||||
|
additional_system_fields = {
|
||||||
|
'last_check_status', # Set by processors
|
||||||
|
'restock', # Set by restock processor
|
||||||
|
'last_viewed', # Set by mark_all_viewed endpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only mark as edited if this is a user-writable field
|
||||||
|
if key not in readonly_fields and key not in additional_system_fields:
|
||||||
|
self.__watch_was_edited = True
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
"""
|
||||||
|
Override dict.__setitem__ to track when writable watch fields are modified.
|
||||||
|
|
||||||
|
This enables skipping reprocessing when:
|
||||||
|
1. HTML content is unchanged (checksumFromPreviousCheckWasTheSame)
|
||||||
|
2. AND watch configuration was not edited
|
||||||
|
|
||||||
|
Only sets the edited flag when field is NOT in readonly_fields (from OpenAPI spec).
|
||||||
|
"""
|
||||||
|
# Set the value first (always)
|
||||||
|
super().__setitem__(key, value)
|
||||||
|
# Mark as edited if writable field
|
||||||
|
self._mark_field_as_edited(key)
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
"""Override dict.__delitem__ to track deletions of writable fields."""
|
||||||
|
super().__delitem__(key)
|
||||||
|
self._mark_field_as_edited(key)
|
||||||
|
|
||||||
|
def update(self, *args, **kwargs):
|
||||||
|
|
||||||
|
if args and args[0].get('browser_steps'):
|
||||||
|
args[0]['browser_steps'] = browser_steps_get_valid_steps(args[0].get('browser_steps'))
|
||||||
|
|
||||||
|
"""Override dict.update() to track modifications to writable fields."""
|
||||||
|
# Call parent update first
|
||||||
|
super().update(*args, **kwargs)
|
||||||
|
|
||||||
|
# Mark as edited for any writable fields that were updated
|
||||||
|
# Handle both update(dict) and update(key=value) forms
|
||||||
|
if args:
|
||||||
|
for key in args[0].keys():
|
||||||
|
self._mark_field_as_edited(key)
|
||||||
|
for key in kwargs.keys():
|
||||||
|
self._mark_field_as_edited(key)
|
||||||
|
|
||||||
|
|
||||||
|
def pop(self, key, *args):
|
||||||
|
"""Override dict.pop() to track removal of writable fields."""
|
||||||
|
result = super().pop(key, *args)
|
||||||
|
self._mark_field_as_edited(key)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def setdefault(self, key, default=None):
|
||||||
|
"""Override dict.setdefault() to track modifications to writable fields."""
|
||||||
|
# Only marks as edited if key didn't exist (i.e., a new value was set)
|
||||||
|
existed = key in self
|
||||||
|
result = super().setdefault(key, default)
|
||||||
|
if not existed:
|
||||||
|
self._mark_field_as_edited(key)
|
||||||
|
return result
|
||||||
|
|
||||||
|
@property
|
||||||
|
def was_edited(self):
|
||||||
|
"""
|
||||||
|
Check if watch configuration was edited since last processing.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if writable fields were modified, False otherwise
|
||||||
|
"""
|
||||||
|
return getattr(self, '_watch_base__watch_was_edited', False)
|
||||||
|
|
||||||
|
def reset_watch_edited_flag(self):
|
||||||
|
"""
|
||||||
|
Reset the watch edited flag after successful processing.
|
||||||
|
|
||||||
|
Call this after processing completes to allow future content-only change detection.
|
||||||
|
"""
|
||||||
|
self.__watch_was_edited = False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_property_names(cls):
|
def get_property_names(cls):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -0,0 +1,92 @@
|
|||||||
|
"""
|
||||||
|
Schema utilities for Watch and Tag models.
|
||||||
|
|
||||||
|
Provides functions to extract readonly fields and properties from OpenAPI spec.
|
||||||
|
Shared by both the model layer and API layer to avoid circular dependencies.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import functools
|
||||||
|
|
||||||
|
|
||||||
|
@functools.cache
|
||||||
|
def get_openapi_schema_dict():
|
||||||
|
"""
|
||||||
|
Get the raw OpenAPI spec dictionary for schema access.
|
||||||
|
|
||||||
|
Returns the YAML dict directly (not the OpenAPI object).
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||||
|
if not os.path.exists(spec_path):
|
||||||
|
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||||
|
|
||||||
|
with open(spec_path, 'r', encoding='utf-8') as f:
|
||||||
|
return yaml.safe_load(f)
|
||||||
|
|
||||||
|
|
||||||
|
@functools.cache
|
||||||
|
def _resolve_readonly_fields(schema_name):
|
||||||
|
"""
|
||||||
|
Generic helper to resolve readOnly fields, including allOf inheritance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
schema_name: Name of the schema (e.g., 'Watch', 'Tag')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
frozenset: All readOnly field names including inherited ones
|
||||||
|
"""
|
||||||
|
spec_dict = get_openapi_schema_dict()
|
||||||
|
schema = spec_dict['components']['schemas'].get(schema_name, {})
|
||||||
|
|
||||||
|
readonly_fields = set()
|
||||||
|
|
||||||
|
# Handle allOf (schema inheritance)
|
||||||
|
if 'allOf' in schema:
|
||||||
|
for item in schema['allOf']:
|
||||||
|
# Resolve $ref to parent schema
|
||||||
|
if '$ref' in item:
|
||||||
|
ref_path = item['$ref'].split('/')[-1]
|
||||||
|
ref_schema = spec_dict['components']['schemas'].get(ref_path, {})
|
||||||
|
if 'properties' in ref_schema:
|
||||||
|
for field_name, field_def in ref_schema['properties'].items():
|
||||||
|
if field_def.get('readOnly') is True:
|
||||||
|
readonly_fields.add(field_name)
|
||||||
|
# Check schema-specific properties
|
||||||
|
if 'properties' in item:
|
||||||
|
for field_name, field_def in item['properties'].items():
|
||||||
|
if field_def.get('readOnly') is True:
|
||||||
|
readonly_fields.add(field_name)
|
||||||
|
else:
|
||||||
|
# Direct properties (no inheritance)
|
||||||
|
if 'properties' in schema:
|
||||||
|
for field_name, field_def in schema['properties'].items():
|
||||||
|
if field_def.get('readOnly') is True:
|
||||||
|
readonly_fields.add(field_name)
|
||||||
|
|
||||||
|
return frozenset(readonly_fields)
|
||||||
|
|
||||||
|
|
||||||
|
@functools.cache
|
||||||
|
def get_readonly_watch_fields():
|
||||||
|
"""
|
||||||
|
Extract readOnly field names from Watch schema in OpenAPI spec.
|
||||||
|
|
||||||
|
Returns readOnly fields from WatchBase (uuid, date_created) + Watch-specific readOnly fields.
|
||||||
|
|
||||||
|
Used by:
|
||||||
|
- model/watch_base.py: Track when writable fields are edited
|
||||||
|
- api/Watch.py: Filter readonly fields from PUT requests
|
||||||
|
"""
|
||||||
|
return _resolve_readonly_fields('Watch')
|
||||||
|
|
||||||
|
|
||||||
|
@functools.cache
|
||||||
|
def get_readonly_tag_fields():
|
||||||
|
"""
|
||||||
|
Extract readOnly field names from Tag schema in OpenAPI spec.
|
||||||
|
|
||||||
|
Returns readOnly fields from WatchBase (uuid, date_created) + Tag-specific readOnly fields.
|
||||||
|
"""
|
||||||
|
return _resolve_readonly_fields('Tag')
|
||||||
@@ -54,34 +54,128 @@ def _check_cascading_vars(datastore, var_name, watch):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class FormattableTimestamp(str):
|
||||||
|
"""
|
||||||
|
A str subclass representing a formatted datetime. As a plain string it renders
|
||||||
|
with the default format, but can also be called with a custom format argument
|
||||||
|
in Jinja2 templates:
|
||||||
|
|
||||||
|
{{ change_datetime }} → '2024-01-15 10:30:00 UTC'
|
||||||
|
{{ change_datetime(format='%Y') }} → '2024'
|
||||||
|
{{ change_datetime(format='%A') }} → 'Monday'
|
||||||
|
{{ change_datetime(format='%Y-%m-%d') }} → '2024-01-15'
|
||||||
|
|
||||||
|
Being a str subclass means it is natively JSON serializable.
|
||||||
|
"""
|
||||||
|
_DEFAULT_FORMAT = '%Y-%m-%d %H:%M:%S %Z'
|
||||||
|
|
||||||
|
def __new__(cls, timestamp):
|
||||||
|
dt = datetime.datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
|
||||||
|
local_tz = datetime.datetime.now().astimezone().tzinfo
|
||||||
|
dt_local = dt.astimezone(local_tz)
|
||||||
|
try:
|
||||||
|
formatted = dt_local.strftime(cls._DEFAULT_FORMAT)
|
||||||
|
except Exception:
|
||||||
|
formatted = dt_local.isoformat()
|
||||||
|
instance = super().__new__(cls, formatted)
|
||||||
|
instance._dt = dt_local
|
||||||
|
return instance
|
||||||
|
|
||||||
|
def __call__(self, format=_DEFAULT_FORMAT):
|
||||||
|
try:
|
||||||
|
return self._dt.strftime(format)
|
||||||
|
except Exception:
|
||||||
|
return self._dt.isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
class FormattableDiff(str):
|
||||||
|
"""
|
||||||
|
A str subclass representing a rendered diff. As a plain string it renders
|
||||||
|
with the default options for that variant, but can be called with custom
|
||||||
|
arguments in Jinja2 templates:
|
||||||
|
|
||||||
|
{{ diff }} → default diff output
|
||||||
|
{{ diff(lines=5) }} → truncate to 5 lines
|
||||||
|
{{ diff(added_only=true) }} → only show added lines
|
||||||
|
{{ diff(removed_only=true) }} → only show removed lines
|
||||||
|
{{ diff(context=3) }} → 3 lines of context around changes
|
||||||
|
{{ diff(word_diff=false) }} → line-level diff instead of word-level
|
||||||
|
{{ diff(lines=10, added_only=true) }} → combine args
|
||||||
|
{{ diff_added(lines=5) }} → works on any diff_* variant too
|
||||||
|
|
||||||
|
Being a str subclass means it is natively JSON serializable.
|
||||||
|
"""
|
||||||
|
def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs):
|
||||||
|
if prev_snapshot or current_snapshot:
|
||||||
|
from changedetectionio import diff as diff_module
|
||||||
|
rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs)
|
||||||
|
else:
|
||||||
|
rendered = ''
|
||||||
|
instance = super().__new__(cls, rendered)
|
||||||
|
instance._prev = prev_snapshot
|
||||||
|
instance._current = current_snapshot
|
||||||
|
instance._base_kwargs = base_kwargs
|
||||||
|
return instance
|
||||||
|
|
||||||
|
def __call__(self, lines=None, added_only=False, removed_only=False, context=0,
|
||||||
|
word_diff=None, case_insensitive=False, ignore_junk=False):
|
||||||
|
from changedetectionio import diff as diff_module
|
||||||
|
kwargs = dict(self._base_kwargs)
|
||||||
|
|
||||||
|
if added_only:
|
||||||
|
kwargs['include_removed'] = False
|
||||||
|
if removed_only:
|
||||||
|
kwargs['include_added'] = False
|
||||||
|
if context:
|
||||||
|
kwargs['context_lines'] = int(context)
|
||||||
|
if word_diff is not None:
|
||||||
|
kwargs['word_diff'] = bool(word_diff)
|
||||||
|
if case_insensitive:
|
||||||
|
kwargs['case_insensitive'] = True
|
||||||
|
if ignore_junk:
|
||||||
|
kwargs['ignore_junk'] = True
|
||||||
|
|
||||||
|
result = diff_module.render_diff(self._prev or '', self._current or '', **kwargs)
|
||||||
|
|
||||||
|
if lines is not None:
|
||||||
|
result = '\n'.join(result.splitlines()[:int(lines)])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||||
class NotificationContextData(dict):
|
class NotificationContextData(dict):
|
||||||
def __init__(self, initial_data=None, **kwargs):
|
def __init__(self, initial_data=None, **kwargs):
|
||||||
|
# ValidateJinja2Template() validates against the keynames of this dict to check for valid tokens in the body (user submission)
|
||||||
super().__init__({
|
super().__init__({
|
||||||
'base_url': None,
|
'base_url': None,
|
||||||
|
'change_datetime': FormattableTimestamp(time.time()),
|
||||||
'current_snapshot': None,
|
'current_snapshot': None,
|
||||||
'diff': None,
|
'diff': FormattableDiff('', ''),
|
||||||
'diff_clean': None,
|
'diff_clean': FormattableDiff('', '', include_change_type_prefix=False),
|
||||||
'diff_added': None,
|
'diff_added': FormattableDiff('', '', include_removed=False),
|
||||||
'diff_added_clean': None,
|
'diff_added_clean': FormattableDiff('', '', include_removed=False, include_change_type_prefix=False),
|
||||||
'diff_full': None,
|
'diff_full': FormattableDiff('', '', include_equal=True),
|
||||||
'diff_full_clean': None,
|
'diff_full_clean': FormattableDiff('', '', include_equal=True, include_change_type_prefix=False),
|
||||||
'diff_patch': None,
|
'diff_patch': FormattableDiff('', '', patch_format=True),
|
||||||
'diff_removed': None,
|
'diff_removed': FormattableDiff('', '', include_added=False),
|
||||||
'diff_removed_clean': None,
|
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
|
||||||
'diff_url': None,
|
'diff_url': None,
|
||||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||||
'notification_timestamp': time.time(),
|
'notification_timestamp': time.time(),
|
||||||
|
'prev_snapshot': None,
|
||||||
'preview_url': None,
|
'preview_url': None,
|
||||||
'screenshot': None,
|
'screenshot': None,
|
||||||
'triggered_text': None,
|
|
||||||
'timestamp_from': None,
|
'timestamp_from': None,
|
||||||
'timestamp_to': None,
|
'timestamp_to': None,
|
||||||
|
'triggered_text': None,
|
||||||
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||||
'watch_mime_type': None,
|
'watch_mime_type': None,
|
||||||
'watch_tag': None,
|
'watch_tag': None,
|
||||||
'watch_title': None,
|
'watch_title': None,
|
||||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||||
|
'watch_uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||||
})
|
})
|
||||||
|
|
||||||
# Apply any initial data passed in
|
# Apply any initial data passed in
|
||||||
@@ -103,7 +197,7 @@ class NotificationContextData(dict):
|
|||||||
So we can test the output in the notification body
|
So we can test the output in the notification body
|
||||||
"""
|
"""
|
||||||
for key in self.keys():
|
for key in self.keys():
|
||||||
if key in ['uuid', 'time', 'watch_uuid']:
|
if key in ['uuid', 'time', 'watch_uuid', 'change_datetime'] or key.startswith('diff'):
|
||||||
continue
|
continue
|
||||||
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
||||||
self[key] = rand_str
|
self[key] = rand_str
|
||||||
@@ -115,24 +209,6 @@ class NotificationContextData(dict):
|
|||||||
|
|
||||||
super().__setitem__(key, value)
|
super().__setitem__(key, value)
|
||||||
|
|
||||||
def timestamp_to_localtime(timestamp):
|
|
||||||
# Format the date using locale-aware formatting with timezone
|
|
||||||
dt = datetime.datetime.fromtimestamp(int(timestamp))
|
|
||||||
dt = dt.replace(tzinfo=pytz.UTC)
|
|
||||||
|
|
||||||
# Get local timezone-aware datetime
|
|
||||||
local_tz = datetime.datetime.now().astimezone().tzinfo
|
|
||||||
local_dt = dt.astimezone(local_tz)
|
|
||||||
|
|
||||||
# Format date with timezone - using strftime for locale awareness
|
|
||||||
try:
|
|
||||||
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
|
|
||||||
except:
|
|
||||||
# Fallback if locale issues
|
|
||||||
formatted_date = local_dt.isoformat()
|
|
||||||
|
|
||||||
return formatted_date
|
|
||||||
|
|
||||||
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
|
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
|
||||||
"""
|
"""
|
||||||
Efficiently renders only the diff placeholders that are actually used in the notification text.
|
Efficiently renders only the diff placeholders that are actually used in the notification text.
|
||||||
@@ -150,13 +226,12 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
|||||||
Returns:
|
Returns:
|
||||||
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
||||||
"""
|
"""
|
||||||
from changedetectionio import diff
|
|
||||||
import re
|
import re
|
||||||
from functools import lru_cache
|
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
# Define specifications for each diff variant
|
# Define base kwargs for each diff variant — these become the stored defaults
|
||||||
|
# on the FormattableDiff object, so {{ diff(lines=5) }} overrides on top of them
|
||||||
diff_specs = {
|
diff_specs = {
|
||||||
'diff': {'word_diff': word_diff},
|
'diff': {'word_diff': word_diff},
|
||||||
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
|
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
|
||||||
@@ -169,22 +244,15 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
|||||||
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
|
||||||
}
|
}
|
||||||
|
|
||||||
# Memoize render_diff to avoid duplicate renders with same kwargs
|
|
||||||
@lru_cache(maxsize=4)
|
|
||||||
def cached_render(kwargs_tuple):
|
|
||||||
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
|
|
||||||
|
|
||||||
ret = {}
|
ret = {}
|
||||||
rendered_count = 0
|
rendered_count = 0
|
||||||
# Only check and render diff keys that exist in NotificationContextData
|
# Only create FormattableDiff objects for diff keys actually used in the notification text
|
||||||
for key in NotificationContextData().keys():
|
for key in NotificationContextData().keys():
|
||||||
if key.startswith('diff') and key in diff_specs:
|
if key.startswith('diff') and key in diff_specs:
|
||||||
# Check if this placeholder is actually used in the notification text
|
# Check if this placeholder is actually used in the notification text
|
||||||
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
|
||||||
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
if re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||||
kwargs = diff_specs[key]
|
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||||
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
|
|
||||||
ret[key] = cached_render(tuple(sorted(kwargs.items())))
|
|
||||||
rendered_count += 1
|
rendered_count += 1
|
||||||
|
|
||||||
if rendered_count:
|
if rendered_count:
|
||||||
@@ -198,7 +266,7 @@ def set_basic_notification_vars(current_snapshot, prev_snapshot, watch, triggere
|
|||||||
'current_snapshot': current_snapshot,
|
'current_snapshot': current_snapshot,
|
||||||
'prev_snapshot': prev_snapshot,
|
'prev_snapshot': prev_snapshot,
|
||||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||||
'change_datetime': timestamp_to_localtime(timestamp_changed) if timestamp_changed else None,
|
'change_datetime': FormattableTimestamp(timestamp_changed) if timestamp_changed else None,
|
||||||
'triggered_text': triggered_text,
|
'triggered_text': triggered_text,
|
||||||
'uuid': watch.get('uuid') if watch else None,
|
'uuid': watch.get('uuid') if watch else None,
|
||||||
'watch_url': watch.get('url') if watch else None,
|
'watch_url': watch.get('url') if watch else None,
|
||||||
|
|||||||
@@ -129,6 +129,51 @@ class ChangeDetectionSpec:
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@hookspec
|
||||||
|
def update_handler_alter(update_handler, watch, datastore):
|
||||||
|
"""Modify or wrap the update_handler before it processes a watch.
|
||||||
|
|
||||||
|
This hook is called after the update_handler (perform_site_check instance) is created
|
||||||
|
but before it calls call_browser() and run_changedetection(). Plugins can use this to:
|
||||||
|
- Wrap the handler to add logging/metrics
|
||||||
|
- Modify handler configuration
|
||||||
|
- Add custom preprocessing logic
|
||||||
|
|
||||||
|
Args:
|
||||||
|
update_handler: The perform_site_check instance that will process the watch
|
||||||
|
watch: The watch dict being processed
|
||||||
|
datastore: The application datastore
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
object or None: Return a modified/wrapped handler, or None to keep the original.
|
||||||
|
If multiple plugins return handlers, they are chained in registration order.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@hookspec
|
||||||
|
def update_finalize(update_handler, watch, datastore, processing_exception):
|
||||||
|
"""Called after watch processing completes (success or failure).
|
||||||
|
|
||||||
|
This hook is called in the finally block after all processing is complete,
|
||||||
|
allowing plugins to perform cleanup, update metrics, or log final status.
|
||||||
|
|
||||||
|
The plugin can access update_handler.last_logging_insert_id if it was stored
|
||||||
|
during update_handler_alter, and use processing_exception to determine if
|
||||||
|
the processing succeeded or failed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
update_handler: The perform_site_check instance (may be None if creation failed)
|
||||||
|
watch: The watch dict that was processed (may be None if not loaded)
|
||||||
|
datastore: The application datastore
|
||||||
|
processing_exception: The exception from the main processing block, or None if successful.
|
||||||
|
This does NOT include cleanup exceptions - only exceptions from
|
||||||
|
the actual watch processing (fetch, diff, etc).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None: This hook doesn't return a value
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Set up Plugin Manager
|
# Set up Plugin Manager
|
||||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||||
@@ -499,4 +544,66 @@ def get_plugin_template_paths():
|
|||||||
template_paths.append(templates_dir)
|
template_paths.append(templates_dir)
|
||||||
logger.debug(f"Added plugin template path: {templates_dir}")
|
logger.debug(f"Added plugin template path: {templates_dir}")
|
||||||
|
|
||||||
return template_paths
|
return template_paths
|
||||||
|
|
||||||
|
|
||||||
|
def apply_update_handler_alter(update_handler, watch, datastore):
|
||||||
|
"""Apply update_handler_alter hooks from all plugins.
|
||||||
|
|
||||||
|
Allows plugins to wrap or modify the update_handler before it processes a watch.
|
||||||
|
Multiple plugins can chain modifications - each plugin receives the result from
|
||||||
|
the previous plugin.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
update_handler: The perform_site_check instance to potentially modify
|
||||||
|
watch: The watch dict being processed
|
||||||
|
datastore: The application datastore
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
object: The (potentially modified/wrapped) update_handler
|
||||||
|
"""
|
||||||
|
# Get all plugins that implement the update_handler_alter hook
|
||||||
|
results = plugin_manager.hook.update_handler_alter(
|
||||||
|
update_handler=update_handler,
|
||||||
|
watch=watch,
|
||||||
|
datastore=datastore
|
||||||
|
)
|
||||||
|
|
||||||
|
# Chain results - each plugin gets the result from the previous one
|
||||||
|
current_handler = update_handler
|
||||||
|
if results:
|
||||||
|
for result in results:
|
||||||
|
if result is not None:
|
||||||
|
logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}")
|
||||||
|
current_handler = result
|
||||||
|
|
||||||
|
return current_handler
|
||||||
|
|
||||||
|
|
||||||
|
def apply_update_finalize(update_handler, watch, datastore, processing_exception):
|
||||||
|
"""Apply update_finalize hooks from all plugins.
|
||||||
|
|
||||||
|
Called in the finally block after watch processing completes, allowing plugins
|
||||||
|
to perform cleanup, update metrics, or log final status.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
update_handler: The perform_site_check instance (may be None)
|
||||||
|
watch: The watch dict that was processed (may be None)
|
||||||
|
datastore: The application datastore
|
||||||
|
processing_exception: The exception from processing, or None if successful
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Call all plugins that implement the update_finalize hook
|
||||||
|
plugin_manager.hook.update_finalize(
|
||||||
|
update_handler=update_handler,
|
||||||
|
watch=watch,
|
||||||
|
datastore=datastore,
|
||||||
|
processing_exception=processing_exception
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
# Don't let plugin errors crash the worker
|
||||||
|
logger.error(f"Error in update_finalize hook: {e}")
|
||||||
|
logger.exception(f"update_finalize hook exception details:")
|
||||||
@@ -9,6 +9,15 @@ Some suggestions for the future
|
|||||||
|
|
||||||
- `graphical`
|
- `graphical`
|
||||||
|
|
||||||
|
## API schema extension (`api.yaml`)
|
||||||
|
|
||||||
|
A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`.
|
||||||
|
Define a `components.schemas.processor_config_<name>` entry and it will be merged into `WatchBase` at startup,
|
||||||
|
making `processor_config_<name>` a valid field on all watch create/update API calls.
|
||||||
|
The fully merged spec is served live at `/api/v1/full-spec`.
|
||||||
|
|
||||||
|
See `restock_diff/api.yaml` for a working example.
|
||||||
|
|
||||||
## Todo
|
## Todo
|
||||||
|
|
||||||
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
|
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from flask_babel import gettext
|
from flask_babel import gettext, get_locale
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import os
|
import os
|
||||||
@@ -190,14 +190,15 @@ def get_plugin_processor_metadata():
|
|||||||
logger.warning(f"Error getting plugin processor metadata: {e}")
|
logger.warning(f"Error getting plugin processor metadata: {e}")
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
|
@lru_cache(maxsize=32)
|
||||||
def available_processors():
|
def _available_processors_cached(locale_str):
|
||||||
"""
|
|
||||||
Get a list of processors by name and description for the UI elements.
|
|
||||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
|
||||||
:return: A list :)
|
|
||||||
"""
|
"""
|
||||||
|
Internal cached function that includes locale in cache key.
|
||||||
|
This ensures translations are cached per-language instead of globally.
|
||||||
|
|
||||||
|
:param locale_str: The locale string (e.g., 'en', 'it', 'zh')
|
||||||
|
:return: A list of tuples (processor_name, translated_description, weight)
|
||||||
|
"""
|
||||||
processor_classes = find_processors()
|
processor_classes = find_processors()
|
||||||
|
|
||||||
# Check if DISABLED_PROCESSORS env var is set
|
# Check if DISABLED_PROCESSORS env var is set
|
||||||
@@ -256,6 +257,22 @@ def available_processors():
|
|||||||
# Return as tuples without weight (for backwards compatibility)
|
# Return as tuples without weight (for backwards compatibility)
|
||||||
return [(name, desc) for name, desc, weight in available]
|
return [(name, desc) for name, desc, weight in available]
|
||||||
|
|
||||||
|
def available_processors():
|
||||||
|
"""
|
||||||
|
Get a list of processors by name and description for the UI elements.
|
||||||
|
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||||
|
|
||||||
|
This function delegates to a locale-aware cached version to ensure translations
|
||||||
|
are cached per-language instead of globally.
|
||||||
|
|
||||||
|
:return: A list of tuples (processor_name, translated_description)
|
||||||
|
"""
|
||||||
|
# Get current locale and use it as cache key
|
||||||
|
# Convert Babel Locale object to string for use as cache key
|
||||||
|
locale = get_locale()
|
||||||
|
locale_str = str(locale) if locale else 'en'
|
||||||
|
return _available_processors_cached(locale_str)
|
||||||
|
|
||||||
|
|
||||||
def get_default_processor():
|
def get_default_processor():
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,10 +1,15 @@
|
|||||||
|
import asyncio
|
||||||
import re
|
import re
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
|
from changedetectionio.browser_steps.browser_steps import browser_steps_get_valid_steps
|
||||||
from changedetectionio.content_fetchers.base import Fetcher
|
from changedetectionio.content_fetchers.base import Fetcher
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
|
from changedetectionio.validate_url import is_private_hostname
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
import os
|
import os
|
||||||
|
from urllib.parse import urlparse
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
SCREENSHOT_FORMAT_JPEG = 'JPEG'
|
||||||
@@ -19,6 +24,7 @@ class difference_detection_processor():
|
|||||||
xpath_data = None
|
xpath_data = None
|
||||||
preferred_proxy = None
|
preferred_proxy = None
|
||||||
screenshot_format = SCREENSHOT_FORMAT_JPEG
|
screenshot_format = SCREENSHOT_FORMAT_JPEG
|
||||||
|
last_raw_content_checksum = None
|
||||||
|
|
||||||
def __init__(self, datastore, watch_uuid):
|
def __init__(self, datastore, watch_uuid):
|
||||||
self.datastore = datastore
|
self.datastore = datastore
|
||||||
@@ -34,6 +40,81 @@ class difference_detection_processor():
|
|||||||
# Generic fetcher that should be extended (requests, playwright etc)
|
# Generic fetcher that should be extended (requests, playwright etc)
|
||||||
self.fetcher = Fetcher()
|
self.fetcher = Fetcher()
|
||||||
|
|
||||||
|
# Load the last raw content checksum from file
|
||||||
|
self.read_last_raw_content_checksum()
|
||||||
|
|
||||||
|
def update_last_raw_content_checksum(self, checksum):
|
||||||
|
"""
|
||||||
|
Save the raw content MD5 checksum to file.
|
||||||
|
This is used for skip logic - avoid reprocessing if raw HTML unchanged.
|
||||||
|
"""
|
||||||
|
if not checksum:
|
||||||
|
return
|
||||||
|
|
||||||
|
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||||
|
if not watch:
|
||||||
|
return
|
||||||
|
|
||||||
|
data_dir = watch.data_dir
|
||||||
|
if not data_dir:
|
||||||
|
return
|
||||||
|
|
||||||
|
watch.ensure_data_dir_exists()
|
||||||
|
checksum_file = os.path.join(data_dir, 'last-checksum.txt')
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(checksum_file, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(checksum)
|
||||||
|
self.last_raw_content_checksum = checksum
|
||||||
|
except IOError as e:
|
||||||
|
logger.warning(f"Failed to write checksum file for {self.watch_uuid}: {e}")
|
||||||
|
|
||||||
|
def read_last_raw_content_checksum(self):
|
||||||
|
"""
|
||||||
|
Read the last raw content MD5 checksum from file.
|
||||||
|
Returns None if file doesn't exist (first run) or can't be read.
|
||||||
|
"""
|
||||||
|
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||||
|
if not watch:
|
||||||
|
self.last_raw_content_checksum = None
|
||||||
|
return
|
||||||
|
|
||||||
|
data_dir = watch.data_dir
|
||||||
|
if not data_dir:
|
||||||
|
self.last_raw_content_checksum = None
|
||||||
|
return
|
||||||
|
|
||||||
|
checksum_file = os.path.join(data_dir, 'last-checksum.txt')
|
||||||
|
|
||||||
|
if not os.path.isfile(checksum_file):
|
||||||
|
self.last_raw_content_checksum = None
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(checksum_file, 'r', encoding='utf-8') as f:
|
||||||
|
self.last_raw_content_checksum = f.read().strip()
|
||||||
|
except IOError as e:
|
||||||
|
logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}")
|
||||||
|
self.last_raw_content_checksum = None
|
||||||
|
|
||||||
|
|
||||||
|
async def validate_iana_url(self):
|
||||||
|
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
|
||||||
|
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
|
||||||
|
through call_browser().
|
||||||
|
"""
|
||||||
|
if strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')):
|
||||||
|
return
|
||||||
|
parsed = urlparse(self.watch.link)
|
||||||
|
if not parsed.hostname:
|
||||||
|
return
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
if await loop.run_in_executor(None, is_private_hostname, parsed.hostname):
|
||||||
|
raise Exception(
|
||||||
|
f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address. "
|
||||||
|
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
|
||||||
|
)
|
||||||
|
|
||||||
async def call_browser(self, preferred_proxy_id=None):
|
async def call_browser(self, preferred_proxy_id=None):
|
||||||
|
|
||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
@@ -47,6 +128,8 @@ class difference_detection_processor():
|
|||||||
"file:// type access is denied for security reasons."
|
"file:// type access is denied for security reasons."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
await self.validate_iana_url()
|
||||||
|
|
||||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||||
|
|
||||||
@@ -110,7 +193,7 @@ class difference_detection_processor():
|
|||||||
)
|
)
|
||||||
|
|
||||||
if self.watch.has_browser_steps:
|
if self.watch.has_browser_steps:
|
||||||
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
|
self.fetcher.browser_steps = browser_steps_get_valid_steps(self.watch.get('browser_steps', []))
|
||||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||||
|
|
||||||
# Tweak the base config with the per-watch ones
|
# Tweak the base config with the per-watch ones
|
||||||
@@ -177,6 +260,16 @@ class difference_detection_processor():
|
|||||||
# @todo .quit here could go on close object, so we can run JS if change-detected
|
# @todo .quit here could go on close object, so we can run JS if change-detected
|
||||||
await self.fetcher.quit(watch=self.watch)
|
await self.fetcher.quit(watch=self.watch)
|
||||||
|
|
||||||
|
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
|
||||||
|
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
|
||||||
|
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
|
||||||
|
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
|
||||||
|
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
|
||||||
|
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||||
|
|
||||||
|
if self.fetcher.content and isinstance(self.fetcher.content, str):
|
||||||
|
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
|
||||||
|
|
||||||
# After init, call run_changedetection() which will do the actual change-detection
|
# After init, call run_changedetection() which will do the actual change-detection
|
||||||
|
|
||||||
def get_extra_watch_config(self, filename):
|
def get_extra_watch_config(self, filename):
|
||||||
@@ -257,8 +350,16 @@ class difference_detection_processor():
|
|||||||
except IOError as e:
|
except IOError as e:
|
||||||
logger.error(f"Failed to write extra watch config {filename}: {e}")
|
logger.error(f"Failed to write extra watch config {filename}: {e}")
|
||||||
|
|
||||||
|
def get_raw_document_checksum(self):
|
||||||
|
checksum = None
|
||||||
|
|
||||||
|
if self.fetcher.content:
|
||||||
|
checksum = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
return checksum
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def run_changedetection(self, watch):
|
def run_changedetection(self, watch, force_reprocess=False):
|
||||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||||
some_data = 'xxxxx'
|
some_data = 'xxxxx'
|
||||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||||
|
|||||||
@@ -42,10 +42,7 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
|||||||
# Get error information for the template
|
# Get error information for the template
|
||||||
screenshot_url = watch.get_screenshot()
|
screenshot_url = watch.get_screenshot()
|
||||||
|
|
||||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||||
is_html_webdriver = False
|
|
||||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
|
||||||
is_html_webdriver = True
|
|
||||||
|
|
||||||
password_enabled_and_share_is_off = False
|
password_enabled_and_share_is_off = False
|
||||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ class perform_site_check(difference_detection_processor):
|
|||||||
# Override to use PNG format for better image comparison (JPEG compression creates noise)
|
# Override to use PNG format for better image comparison (JPEG compression creates noise)
|
||||||
screenshot_format = SCREENSHOT_FORMAT_PNG
|
screenshot_format = SCREENSHOT_FORMAT_PNG
|
||||||
|
|
||||||
def run_changedetection(self, watch):
|
def run_changedetection(self, watch, force_reprocess=False):
|
||||||
"""
|
"""
|
||||||
Perform screenshot comparison using OpenCV subprocess handler.
|
Perform screenshot comparison using OpenCV subprocess handler.
|
||||||
|
|
||||||
|
|||||||
@@ -100,7 +100,13 @@ class guess_stream_type():
|
|||||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||||
self.is_rss = True
|
self.is_rss = True
|
||||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||||
self.is_json = True
|
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
|
||||||
|
# A JSONP response starts with an identifier followed by '(' - not valid JSON
|
||||||
|
if re.match(r'^\w[\w.]*\s*\(', test_content):
|
||||||
|
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
|
||||||
|
self.is_plaintext = True
|
||||||
|
else:
|
||||||
|
self.is_json = True
|
||||||
elif 'pdf' in magic_content_header:
|
elif 'pdf' in magic_content_header:
|
||||||
self.is_pdf = True
|
self.is_pdf = True
|
||||||
# magic will call a rss document 'xml'
|
# magic will call a rss document 'xml'
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ class Restock(dict):
|
|||||||
|
|
||||||
if standardized_value:
|
if standardized_value:
|
||||||
# Convert to float
|
# Convert to float
|
||||||
|
# @todo locale needs to be the locale of the webpage
|
||||||
return float(parse_decimal(standardized_value, locale='en'))
|
return float(parse_decimal(standardized_value, locale='en'))
|
||||||
|
|
||||||
return None
|
return None
|
||||||
@@ -67,10 +68,6 @@ class Watch(BaseWatch):
|
|||||||
super().__init__(*arg, **kw)
|
super().__init__(*arg, **kw)
|
||||||
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
|
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
|
||||||
|
|
||||||
self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
|
|
||||||
'follow_price_changes': True,
|
|
||||||
'in_stock_processing' : 'in_stock_only'
|
|
||||||
} #@todo update
|
|
||||||
|
|
||||||
def clear_watch(self):
|
def clear_watch(self):
|
||||||
super().clear_watch()
|
super().clear_watch()
|
||||||
|
|||||||
@@ -0,0 +1,149 @@
|
|||||||
|
components:
|
||||||
|
schemas:
|
||||||
|
processor_config_restock_diff:
|
||||||
|
type: object
|
||||||
|
description: Configuration for the restock_diff processor (restock and price tracking)
|
||||||
|
properties:
|
||||||
|
in_stock_processing:
|
||||||
|
type: string
|
||||||
|
enum: [in_stock_only, all_changes, 'off']
|
||||||
|
default: in_stock_only
|
||||||
|
description: |
|
||||||
|
When to trigger on stock changes:
|
||||||
|
- `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions
|
||||||
|
- `all_changes`: Trigger on any availability change
|
||||||
|
- `off`: Disable stock/availability tracking
|
||||||
|
follow_price_changes:
|
||||||
|
type: boolean
|
||||||
|
default: true
|
||||||
|
description: Monitor and track price changes
|
||||||
|
price_change_min:
|
||||||
|
type: [number, 'null']
|
||||||
|
description: Trigger a notification when the price drops below this value
|
||||||
|
price_change_max:
|
||||||
|
type: [number, 'null']
|
||||||
|
description: Trigger a notification when the price rises above this value
|
||||||
|
price_change_threshold_percent:
|
||||||
|
type: [number, 'null']
|
||||||
|
minimum: 0
|
||||||
|
maximum: 100
|
||||||
|
description: Minimum price change percentage since the original price to trigger a notification
|
||||||
|
|
||||||
|
paths:
|
||||||
|
/watch:
|
||||||
|
post:
|
||||||
|
x-code-samples:
|
||||||
|
- lang: 'curl'
|
||||||
|
label: 'Restock & price tracking'
|
||||||
|
source: |
|
||||||
|
curl -X POST "http://localhost:5000/api/v1/watch" \
|
||||||
|
-H "x-api-key: YOUR_API_KEY" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"url": "https://example.com/product",
|
||||||
|
"processor": "restock_diff",
|
||||||
|
"processor_config_restock_diff": {
|
||||||
|
"in_stock_processing": "in_stock_only",
|
||||||
|
"follow_price_changes": true,
|
||||||
|
"price_change_threshold_percent": 5
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
- lang: 'Python'
|
||||||
|
label: 'Restock & price tracking'
|
||||||
|
source: |
|
||||||
|
import requests
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'x-api-key': 'YOUR_API_KEY',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
'url': 'https://example.com/product',
|
||||||
|
'processor': 'restock_diff',
|
||||||
|
'processor_config_restock_diff': {
|
||||||
|
'in_stock_processing': 'in_stock_only',
|
||||||
|
'follow_price_changes': True,
|
||||||
|
'price_change_threshold_percent': 5,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response = requests.post('http://localhost:5000/api/v1/watch',
|
||||||
|
headers=headers, json=data)
|
||||||
|
print(response.json())
|
||||||
|
|
||||||
|
/watch/{uuid}:
|
||||||
|
put:
|
||||||
|
x-code-samples:
|
||||||
|
- lang: 'curl'
|
||||||
|
label: 'Update restock config'
|
||||||
|
source: |
|
||||||
|
curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \
|
||||||
|
-H "x-api-key: YOUR_API_KEY" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"processor_config_restock_diff": {
|
||||||
|
"in_stock_processing": "all_changes",
|
||||||
|
"follow_price_changes": true,
|
||||||
|
"price_change_min": 10.00,
|
||||||
|
"price_change_max": 500.00
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
- lang: 'Python'
|
||||||
|
label: 'Update restock config'
|
||||||
|
source: |
|
||||||
|
import requests
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'x-api-key': 'YOUR_API_KEY',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
uuid = 'YOUR-UUID'
|
||||||
|
data = {
|
||||||
|
'processor_config_restock_diff': {
|
||||||
|
'in_stock_processing': 'all_changes',
|
||||||
|
'follow_price_changes': True,
|
||||||
|
'price_change_min': 10.00,
|
||||||
|
'price_change_max': 500.00,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}',
|
||||||
|
headers=headers, json=data)
|
||||||
|
print(response.text)
|
||||||
|
|
||||||
|
/tag/{uuid}:
|
||||||
|
put:
|
||||||
|
x-code-samples:
|
||||||
|
- lang: 'curl'
|
||||||
|
label: 'Set restock config on group/tag'
|
||||||
|
source: |
|
||||||
|
curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \
|
||||||
|
-H "x-api-key: YOUR_API_KEY" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"overrides_watch": true,
|
||||||
|
"processor_config_restock_diff": {
|
||||||
|
"in_stock_processing": "in_stock_only",
|
||||||
|
"follow_price_changes": true,
|
||||||
|
"price_change_threshold_percent": 10
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
- lang: 'Python'
|
||||||
|
label: 'Set restock config on group/tag'
|
||||||
|
source: |
|
||||||
|
import requests
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'x-api-key': 'YOUR_API_KEY',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
tag_uuid = 'YOUR-TAG-UUID'
|
||||||
|
data = {
|
||||||
|
'overrides_watch': True,
|
||||||
|
'processor_config_restock_diff': {
|
||||||
|
'in_stock_processing': 'in_stock_only',
|
||||||
|
'follow_price_changes': True,
|
||||||
|
'price_change_threshold_percent': 10,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}',
|
||||||
|
headers=headers, json=data)
|
||||||
|
print(response.text)
|
||||||
@@ -31,7 +31,7 @@ class RestockSettingsForm(Form):
|
|||||||
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
|
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
|
||||||
|
|
||||||
class processor_settings_form(processor_text_json_diff_form):
|
class processor_settings_form(processor_text_json_diff_form):
|
||||||
restock_settings = FormField(RestockSettingsForm)
|
processor_config_restock_diff = FormField(RestockSettingsForm)
|
||||||
|
|
||||||
def extra_tab_content(self):
|
def extra_tab_content(self):
|
||||||
return _l('Restock & Price Detection')
|
return _l('Restock & Price Detection')
|
||||||
@@ -48,34 +48,34 @@ class processor_settings_form(processor_text_json_diff_form):
|
|||||||
|
|
||||||
output += """
|
output += """
|
||||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||||
<script>
|
<script>
|
||||||
$(document).ready(function () {
|
$(document).ready(function () {
|
||||||
toggleOpacity('#restock_settings-follow_price_changes', '.price-change-minmax', true);
|
toggleOpacity('#processor_config_restock_diff-follow_price_changes', '.price-change-minmax', true);
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<fieldset id="restock-fieldset-price-group">
|
<fieldset id="restock-fieldset-price-group">
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
<fieldset class="pure-group inline-radio">
|
<fieldset class="pure-group inline-radio">
|
||||||
{{ render_field(form.restock_settings.in_stock_processing) }}
|
{{ render_field(form.processor_config_restock_diff.in_stock_processing) }}
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<fieldset class="pure-group">
|
<fieldset class="pure-group">
|
||||||
{{ render_checkbox_field(form.restock_settings.follow_price_changes) }}
|
{{ render_checkbox_field(form.processor_config_restock_diff.follow_price_changes) }}
|
||||||
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
|
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<fieldset class="pure-group price-change-minmax">
|
<fieldset class="pure-group price-change-minmax">
|
||||||
{{ render_field(form.restock_settings.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
{{ render_field(form.processor_config_restock_diff.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
|
||||||
<span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span>
|
<span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<fieldset class="pure-group price-change-minmax">
|
<fieldset class="pure-group price-change-minmax">
|
||||||
{{ render_field(form.restock_settings.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
{{ render_field(form.processor_config_restock_diff.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
|
||||||
<span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span>
|
<span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<fieldset class="pure-group price-change-minmax">
|
<fieldset class="pure-group price-change-minmax">
|
||||||
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
|
{{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }}
|
||||||
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
|
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
|
||||||
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
|
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
</div>
|
</div>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from ..base import difference_detection_processor
|
|||||||
from ..exceptions import ProcessorException
|
from ..exceptions import ProcessorException
|
||||||
from . import Restock
|
from . import Restock
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
|
||||||
|
|
||||||
import urllib3
|
import urllib3
|
||||||
import time
|
import time
|
||||||
@@ -403,44 +404,65 @@ class perform_site_check(difference_detection_processor):
|
|||||||
screenshot = None
|
screenshot = None
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
|
|
||||||
def run_changedetection(self, watch):
|
def run_changedetection(self, watch, force_reprocess=False):
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
if not watch:
|
if not watch:
|
||||||
raise Exception("Watch no longer exists.")
|
raise Exception("Watch no longer exists.")
|
||||||
|
|
||||||
|
current_raw_document_checksum = self.get_raw_document_checksum()
|
||||||
|
# Skip processing only if BOTH conditions are true:
|
||||||
|
# 1. HTML content unchanged (checksum matches last saved checksum)
|
||||||
|
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
|
||||||
|
# The was_edited flag handles all watch configuration changes, so we don't need
|
||||||
|
# separate checks for trigger_text or other processing rules.
|
||||||
|
if (not force_reprocess and
|
||||||
|
not watch.was_edited and
|
||||||
|
self.last_raw_content_checksum and
|
||||||
|
self.last_raw_content_checksum == current_raw_document_checksum):
|
||||||
|
raise checksumFromPreviousCheckWasTheSame()
|
||||||
|
|
||||||
# Unset any existing notification error
|
# Unset any existing notification error
|
||||||
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()}
|
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()}
|
||||||
|
|
||||||
self.screenshot = self.fetcher.screenshot
|
self.screenshot = self.fetcher.screenshot
|
||||||
self.xpath_data = self.fetcher.xpath_data
|
self.xpath_data = self.fetcher.xpath_data
|
||||||
|
|
||||||
# Track the content type
|
# Track the content type (readonly field, doesn't trigger was_edited)
|
||||||
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
|
update_obj['content-type'] = self.fetcher.headers.get('Content-Type', '') # Use hyphen (matches OpenAPI spec)
|
||||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||||
|
|
||||||
|
# Save the raw content checksum to file (processor implementation detail, not watch config)
|
||||||
|
self.update_last_raw_content_checksum(current_raw_document_checksum)
|
||||||
|
|
||||||
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
|
||||||
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
|
||||||
from ...html_tools import html_to_text
|
#useless
|
||||||
text = html_to_text(self.fetcher.content)
|
# from ...html_tools import html_to_text
|
||||||
logger.debug(f"Length of text after conversion: {len(text)}")
|
# text = html_to_text(self.fetcher.content)
|
||||||
if not len(text):
|
# logger.debug(f"Length of text after conversion: {len(text)}")
|
||||||
from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
# if not len(text):
|
||||||
raise ReplyWithContentButNoText(url=watch.link,
|
# from ...content_fetchers.exceptions import ReplyWithContentButNoText
|
||||||
status_code=self.fetcher.get_last_status_code(),
|
# raise ReplyWithContentButNoText(url=watch.link,
|
||||||
screenshot=self.fetcher.screenshot,
|
# status_code=self.fetcher.get_last_status_code(),
|
||||||
html_content=self.fetcher.content,
|
# screenshot=self.fetcher.screenshot,
|
||||||
xpath_data=self.fetcher.xpath_data
|
# html_content=self.fetcher.content,
|
||||||
)
|
# xpath_data=self.fetcher.xpath_data
|
||||||
|
# )
|
||||||
|
|
||||||
# Which restock settings to compare against?
|
# Which restock settings to compare against?
|
||||||
restock_settings = watch.get('restock_settings', {})
|
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
|
||||||
|
_extra_config = self.get_extra_watch_config('restock_diff.json')
|
||||||
|
restock_settings = _extra_config.get('restock_diff') or {
|
||||||
|
'follow_price_changes': True,
|
||||||
|
'in_stock_processing': 'in_stock_only',
|
||||||
|
}
|
||||||
|
|
||||||
# See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find
|
# See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find
|
||||||
for tag_uuid in watch.get('tags'):
|
for tag_uuid in watch.get('tags'):
|
||||||
tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {})
|
tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {})
|
||||||
if tag.get('overrides_watch'):
|
if tag.get('overrides_watch'):
|
||||||
restock_settings = tag.get('restock_settings', {})
|
restock_settings = tag.get('processor_config_restock_diff') or {}
|
||||||
logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
|
logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
@@ -283,4 +283,7 @@ def query_price_availability(extracted_data):
|
|||||||
if not result.get('availability') and 'availability' in microdata:
|
if not result.get('availability') and 'availability' in microdata:
|
||||||
result['availability'] = microdata['availability']
|
result['availability'] = microdata['availability']
|
||||||
|
|
||||||
|
# result['price'] could be float or str here, depending on the website, for example it might contain "1,00" commas, etc.
|
||||||
|
# using something like babel you need to know the locale of the website and even then it can be problematic
|
||||||
|
# we dont really do anything with the price data so far.. so just accept it the way it comes.
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -17,7 +17,8 @@ def _task(watch, update_handler):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# The slow process (we run 2 of these in parallel)
|
# The slow process (we run 2 of these in parallel)
|
||||||
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
|
# Always force reprocess for preview - we want to show the filtered content regardless of checksums
|
||||||
|
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch, force_reprocess=True)
|
||||||
except FilterNotFoundInResponse as e:
|
except FilterNotFoundInResponse as e:
|
||||||
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
||||||
except ReplyWithContentButNoText as e:
|
except ReplyWithContentButNoText as e:
|
||||||
@@ -35,7 +36,7 @@ def _task(watch, update_handler):
|
|||||||
|
|
||||||
|
|
||||||
def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||||
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
'''Used by @app.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])'''
|
||||||
from changedetectionio import forms, html_tools
|
from changedetectionio import forms, html_tools
|
||||||
from changedetectionio.model.Watch import model as watch_model
|
from changedetectionio.model.Watch import model as watch_model
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|||||||
@@ -154,11 +154,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
|||||||
|
|
||||||
screenshot_url = watch.get_screenshot()
|
screenshot_url = watch.get_screenshot()
|
||||||
|
|
||||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
is_html_webdriver = watch.fetcher_supports_screenshots
|
||||||
|
|
||||||
is_html_webdriver = False
|
|
||||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
|
||||||
is_html_webdriver = True
|
|
||||||
|
|
||||||
password_enabled_and_share_is_off = False
|
password_enabled_and_share_is_off = False
|
||||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import re
|
|||||||
import urllib3
|
import urllib3
|
||||||
|
|
||||||
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||||
|
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
|
||||||
from ..base import difference_detection_processor
|
from ..base import difference_detection_processor
|
||||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
||||||
from changedetectionio import html_tools, content_fetchers
|
from changedetectionio import html_tools, content_fetchers
|
||||||
@@ -346,6 +347,7 @@ class ContentProcessor:
|
|||||||
def extract_text_from_html(self, html_content, stream_content_type):
|
def extract_text_from_html(self, html_content, stream_content_type):
|
||||||
"""Convert HTML to plain text."""
|
"""Convert HTML to plain text."""
|
||||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||||
|
|
||||||
return html_tools.html_to_text(
|
return html_tools.html_to_text(
|
||||||
html_content=html_content,
|
html_content=html_content,
|
||||||
render_anchor_tag_content=do_anchor,
|
render_anchor_tag_content=do_anchor,
|
||||||
@@ -368,12 +370,24 @@ class ChecksumCalculator:
|
|||||||
# (set_proxy_from_list)
|
# (set_proxy_from_list)
|
||||||
class perform_site_check(difference_detection_processor):
|
class perform_site_check(difference_detection_processor):
|
||||||
|
|
||||||
def run_changedetection(self, watch):
|
def run_changedetection(self, watch, force_reprocess=False):
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
|
|
||||||
if not watch:
|
if not watch:
|
||||||
raise Exception("Watch no longer exists.")
|
raise Exception("Watch no longer exists.")
|
||||||
|
|
||||||
|
current_raw_document_checksum = self.get_raw_document_checksum()
|
||||||
|
# Skip processing only if BOTH conditions are true:
|
||||||
|
# 1. HTML content unchanged (checksum matches last saved checksum)
|
||||||
|
# 2. Watch configuration was not edited (including trigger_text, filters, etc.)
|
||||||
|
# The was_edited flag handles all watch configuration changes, so we don't need
|
||||||
|
# separate checks for trigger_text or other processing rules.
|
||||||
|
if (not force_reprocess and
|
||||||
|
not watch.was_edited and
|
||||||
|
self.last_raw_content_checksum and
|
||||||
|
self.last_raw_content_checksum == current_raw_document_checksum):
|
||||||
|
raise checksumFromPreviousCheckWasTheSame()
|
||||||
|
|
||||||
# Initialize components
|
# Initialize components
|
||||||
filter_config = FilterConfig(watch, self.datastore)
|
filter_config = FilterConfig(watch, self.datastore)
|
||||||
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
|
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
|
||||||
@@ -391,9 +405,11 @@ class perform_site_check(difference_detection_processor):
|
|||||||
self.screenshot = self.fetcher.screenshot
|
self.screenshot = self.fetcher.screenshot
|
||||||
self.xpath_data = self.fetcher.xpath_data
|
self.xpath_data = self.fetcher.xpath_data
|
||||||
|
|
||||||
# Track the content type and checksum before filters
|
# Track the content type (readonly field, doesn't trigger was_edited)
|
||||||
update_obj['content_type'] = ctype_header
|
update_obj['content-type'] = ctype_header # Use hyphen (matches OpenAPI spec and watch_base default)
|
||||||
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
|
||||||
|
# Save the raw content checksum to file (processor implementation detail, not watch config)
|
||||||
|
self.update_last_raw_content_checksum(current_raw_document_checksum)
|
||||||
|
|
||||||
# === CONTENT PREPROCESSING ===
|
# === CONTENT PREPROCESSING ===
|
||||||
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
|
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
|
||||||
|
|||||||
@@ -29,9 +29,11 @@ def register_watch_operation_handlers(socketio, datastore):
|
|||||||
# Perform the operation
|
# Perform the operation
|
||||||
if op == 'pause':
|
if op == 'pause':
|
||||||
watch.toggle_pause()
|
watch.toggle_pause()
|
||||||
|
watch.commit()
|
||||||
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
||||||
elif op == 'mute':
|
elif op == 'mute':
|
||||||
watch.toggle_mute()
|
watch.toggle_mute()
|
||||||
|
watch.commit()
|
||||||
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
||||||
elif op == 'recheck':
|
elif op == 'recheck':
|
||||||
# Import here to avoid circular imports
|
# Import here to avoid circular imports
|
||||||
|
|||||||
@@ -199,8 +199,31 @@ def handle_watch_update(socketio, **kwargs):
|
|||||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
|
||||||
|
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
|
||||||
|
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
|
||||||
|
The exception is caught inside run_wsgi and routed to self.server.log() — it never
|
||||||
|
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
|
||||||
|
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
|
||||||
|
disconnect correctly via its own disconnect handler and timeout logic."""
|
||||||
|
try:
|
||||||
|
from werkzeug.serving import BaseWSGIServer
|
||||||
|
_original_log = BaseWSGIServer.log
|
||||||
|
|
||||||
|
def _filtered_log(self, type, message, *args):
|
||||||
|
if type == 'error' and 'write() before start_response' in message:
|
||||||
|
return
|
||||||
|
_original_log(self, type, message, *args)
|
||||||
|
|
||||||
|
BaseWSGIServer.log = _filtered_log
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def init_socketio(app, datastore):
|
def init_socketio(app, datastore):
|
||||||
"""Initialize SocketIO with the main Flask app"""
|
"""Initialize SocketIO with the main Flask app"""
|
||||||
|
_suppress_werkzeug_ws_abrupt_disconnect_noise()
|
||||||
|
|
||||||
import platform
|
import platform
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
@@ -345,4 +368,4 @@ def init_socketio(app, datastore):
|
|||||||
|
|
||||||
logger.info("Socket.IO initialized and attached to main Flask app")
|
logger.info("Socket.IO initialized and attached to main Flask app")
|
||||||
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
|
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
|
||||||
return socketio
|
return socketio
|
||||||
|
|||||||
@@ -44,12 +44,12 @@ data_sanity_test () {
|
|||||||
cd ..
|
cd ..
|
||||||
TMPDIR=$(mktemp -d)
|
TMPDIR=$(mktemp -d)
|
||||||
PORT_N=$((5000 + RANDOM % (6501 - 5000)))
|
PORT_N=$((5000 + RANDOM % (6501 - 5000)))
|
||||||
./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
|
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
|
||||||
PID=$!
|
PID=$!
|
||||||
sleep 5
|
sleep 5
|
||||||
kill $PID
|
kill $PID
|
||||||
sleep 2
|
sleep 2
|
||||||
./changedetection.py -p $PORT_N -d $TMPDIR &
|
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR &
|
||||||
PID=$!
|
PID=$!
|
||||||
sleep 5
|
sleep 5
|
||||||
# On a restart the URL should still be there
|
# On a restart the URL should still be there
|
||||||
|
|||||||
@@ -17,8 +17,6 @@ $(document).ready(function () {
|
|||||||
set_scale();
|
set_scale();
|
||||||
});
|
});
|
||||||
// Should always be disabled
|
// Should always be disabled
|
||||||
$('#browser_steps-0-operation option[value="Goto site"]').prop("selected", "selected");
|
|
||||||
$('#browser_steps-0-operation').attr('disabled', 'disabled');
|
|
||||||
|
|
||||||
$('#browsersteps-click-start').click(function () {
|
$('#browsersteps-click-start').click(function () {
|
||||||
$("#browsersteps-click-start").fadeOut();
|
$("#browsersteps-click-start").fadeOut();
|
||||||
@@ -45,12 +43,6 @@ $(document).ready(function () {
|
|||||||
browsersteps_session_id = false;
|
browsersteps_session_id = false;
|
||||||
apply_buttons_disabled = false;
|
apply_buttons_disabled = false;
|
||||||
ctx.clearRect(0, 0, c.width, c.height);
|
ctx.clearRect(0, 0, c.width, c.height);
|
||||||
set_first_gotosite_disabled();
|
|
||||||
}
|
|
||||||
|
|
||||||
function set_first_gotosite_disabled() {
|
|
||||||
$('#browser_steps >li:first-child select').val('Goto site').attr('disabled', 'disabled');
|
|
||||||
$('#browser_steps >li:first-child').css('opacity', '0.5');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Show seconds remaining until the browser interface needs to restart the session
|
// Show seconds remaining until the browser interface needs to restart the session
|
||||||
@@ -243,14 +235,54 @@ $(document).ready(function () {
|
|||||||
ctx.fill();
|
ctx.fill();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reusable AJAX function for browser step operations
|
||||||
|
function executeBrowserStep(url, data = {}) {
|
||||||
|
$('#browser-steps-ui .loader .spinner').fadeIn();
|
||||||
|
apply_buttons_disabled = true;
|
||||||
|
$('ul#browser_steps li .control .apply').css('opacity', 0.5);
|
||||||
|
$("#browsersteps-img").css('opacity', 0.65);
|
||||||
|
|
||||||
|
return $.ajax({
|
||||||
|
method: "POST",
|
||||||
|
url: url,
|
||||||
|
data: data,
|
||||||
|
statusCode: {
|
||||||
|
400: function () {
|
||||||
|
alert("There was a problem processing the request, please reload the page.");
|
||||||
|
$("#loading-status-text").hide();
|
||||||
|
$('#browser-steps-ui .loader .spinner').fadeOut();
|
||||||
|
},
|
||||||
|
401: function (data) {
|
||||||
|
alert(data.responseText);
|
||||||
|
$("#loading-status-text").hide();
|
||||||
|
$('#browser-steps-ui .loader .spinner').fadeOut();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}).done(function (data) {
|
||||||
|
xpath_data = data.xpath_data;
|
||||||
|
$('#browsersteps-img').attr('src', data.screenshot);
|
||||||
|
$('#browser-steps-ui .loader .spinner').fadeOut();
|
||||||
|
apply_buttons_disabled = false;
|
||||||
|
$("#browsersteps-img").css('opacity', 1);
|
||||||
|
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
||||||
|
$("#loading-status-text").hide();
|
||||||
|
}).fail(function (data) {
|
||||||
|
console.log(data);
|
||||||
|
if (data.responseText && data.responseText.includes("Browser session expired")) {
|
||||||
|
disable_browsersteps_ui();
|
||||||
|
}
|
||||||
|
apply_buttons_disabled = false;
|
||||||
|
$("#loading-status-text").hide();
|
||||||
|
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
||||||
|
$("#browsersteps-img").css('opacity', 1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function start() {
|
function start() {
|
||||||
console.log("Starting browser-steps UI");
|
console.log("Starting browser-steps UI");
|
||||||
browsersteps_session_id = false;
|
browsersteps_session_id = false;
|
||||||
// @todo This setting of the first one should be done at the datalayer but wtforms doesnt wanna play nice
|
|
||||||
$('#browser_steps >li:first-child').removeClass('empty');
|
|
||||||
set_first_gotosite_disabled();
|
|
||||||
$('#browser-steps-ui .loader .spinner').show();
|
$('#browser-steps-ui .loader .spinner').show();
|
||||||
$('.clear,.remove', $('#browser_steps >li:first-child')).hide();
|
// Request a new session
|
||||||
$.ajax({
|
$.ajax({
|
||||||
type: "GET",
|
type: "GET",
|
||||||
url: browser_steps_start_url,
|
url: browser_steps_start_url,
|
||||||
@@ -267,11 +299,12 @@ $(document).ready(function () {
|
|||||||
}).done(function (data) {
|
}).done(function (data) {
|
||||||
$("#loading-status-text").fadeIn();
|
$("#loading-status-text").fadeIn();
|
||||||
browsersteps_session_id = data.browsersteps_session_id;
|
browsersteps_session_id = data.browsersteps_session_id;
|
||||||
// This should trigger 'Goto site'
|
|
||||||
console.log("Got startup response, requesting Goto-Site (first) step fake click");
|
|
||||||
$('#browser_steps >li:first-child .apply').click();
|
|
||||||
browser_interface_seconds_remaining = 500;
|
browser_interface_seconds_remaining = 500;
|
||||||
set_first_gotosite_disabled();
|
// Request goto_site operation
|
||||||
|
executeBrowserStep(
|
||||||
|
browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id + "&goto_website_url_first_step=true"
|
||||||
|
);
|
||||||
|
|
||||||
}).fail(function (data) {
|
}).fail(function (data) {
|
||||||
console.log(data);
|
console.log(data);
|
||||||
alert('There was an error communicating with the server.');
|
alert('There was an error communicating with the server.');
|
||||||
@@ -280,7 +313,6 @@ $(document).ready(function () {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function disable_browsersteps_ui() {
|
function disable_browsersteps_ui() {
|
||||||
set_first_gotosite_disabled();
|
|
||||||
$("#browser-steps-ui").css('opacity', '0.3');
|
$("#browser-steps-ui").css('opacity', '0.3');
|
||||||
$('#browsersteps-selector-canvas').off("mousemove mousedown click");
|
$('#browsersteps-selector-canvas').off("mousemove mousedown click");
|
||||||
}
|
}
|
||||||
@@ -328,16 +360,13 @@ $(document).ready(function () {
|
|||||||
// Add the extra buttons to the steps
|
// Add the extra buttons to the steps
|
||||||
$('ul#browser_steps li').each(function (i) {
|
$('ul#browser_steps li').each(function (i) {
|
||||||
var s = '<div class="control">' + '<a data-step-index=' + i + ' class="pure-button button-secondary button-green button-xsmall apply" >Apply</a> ';
|
var s = '<div class="control">' + '<a data-step-index=' + i + ' class="pure-button button-secondary button-green button-xsmall apply" >Apply</a> ';
|
||||||
if (i > 0) {
|
s += `<a data-step-index="${i}" class="pure-button button-secondary button-xsmall clear" >Clear</a> ` +
|
||||||
// The first step never gets these (Goto-site)
|
`<a data-step-index="${i}" class="pure-button button-secondary button-red button-xsmall remove" >Remove</a>`;
|
||||||
s += `<a data-step-index="${i}" class="pure-button button-secondary button-xsmall clear" >Clear</a> ` +
|
|
||||||
`<a data-step-index="${i}" class="pure-button button-secondary button-red button-xsmall remove" >Remove</a>`;
|
|
||||||
|
|
||||||
// if a screenshot is available
|
// if a screenshot is available
|
||||||
if (browser_steps_available_screenshots.includes(i.toString())) {
|
if (browser_steps_available_screenshots.includes(i.toString())) {
|
||||||
var d = (browser_steps_last_error_step === i+1) ? 'before' : 'after';
|
var d = (browser_steps_last_error_step === i+1) ? 'before' : 'after';
|
||||||
s += ` <a data-step-index="${i}" class="pure-button button-secondary button-xsmall show-screenshot" title="Show screenshot from last run" data-type="${d}">Pic</a> `;
|
s += ` <a data-step-index="${i}" class="pure-button button-secondary button-xsmall show-screenshot" title="Show screenshot from last run" data-type="${d}">Pic</a> `;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
s += '</div>';
|
s += '</div>';
|
||||||
$(this).append(s)
|
$(this).append(s)
|
||||||
@@ -376,80 +405,35 @@ $(document).ready(function () {
|
|||||||
});
|
});
|
||||||
|
|
||||||
$('ul#browser_steps li .control .apply').click(function (event) {
|
$('ul#browser_steps li .control .apply').click(function (event) {
|
||||||
// sequential requests @todo refactor
|
|
||||||
if (apply_buttons_disabled) {
|
if (apply_buttons_disabled) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var current_data = $(event.currentTarget).closest('li');
|
var current_data = $(event.currentTarget).closest('li');
|
||||||
$('#browser-steps-ui .loader .spinner').fadeIn();
|
|
||||||
apply_buttons_disabled = true;
|
|
||||||
$('ul#browser_steps li .control .apply').css('opacity', 0.5);
|
|
||||||
$("#browsersteps-img").css('opacity', 0.65);
|
|
||||||
|
|
||||||
var is_last_step = 0;
|
|
||||||
var step_n = $(event.currentTarget).data('step-index');
|
var step_n = $(event.currentTarget).data('step-index');
|
||||||
|
|
||||||
// On the last step, we should also be getting data ready for the visual selector
|
// Determine if this is the last configured step
|
||||||
|
var is_last_step = 0;
|
||||||
$('ul#browser_steps li select').each(function (i) {
|
$('ul#browser_steps li select').each(function (i) {
|
||||||
if ($(this).val() !== 'Choose one') {
|
if ($(this).val() !== 'Choose one') {
|
||||||
is_last_step += 1;
|
is_last_step += 1;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
is_last_step = (is_last_step == (step_n + 1));
|
||||||
if (is_last_step == (step_n + 1)) {
|
|
||||||
is_last_step = true;
|
|
||||||
} else {
|
|
||||||
is_last_step = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log("Requesting step via POST " + $("select[id$='operation']", current_data).first().val());
|
console.log("Requesting step via POST " + $("select[id$='operation']", current_data).first().val());
|
||||||
// POST the currently clicked step form widget back and await response, redraw
|
|
||||||
$.ajax({
|
// Execute the browser step
|
||||||
method: "POST",
|
executeBrowserStep(
|
||||||
url: browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id,
|
browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id,
|
||||||
data: {
|
{
|
||||||
'operation': $("select[id$='operation']", current_data).first().val(),
|
'operation': $("select[id$='operation']", current_data).first().val(),
|
||||||
'selector': $("input[id$='selector']", current_data).first().val(),
|
'selector': $("input[id$='selector']", current_data).first().val(),
|
||||||
'optional_value': $("input[id$='optional_value']", current_data).first().val(),
|
'optional_value': $("input[id$='optional_value']", current_data).first().val(),
|
||||||
'step_n': step_n,
|
'step_n': step_n,
|
||||||
'is_last_step': is_last_step
|
'is_last_step': is_last_step
|
||||||
},
|
|
||||||
statusCode: {
|
|
||||||
400: function () {
|
|
||||||
// More than likely the CSRF token was lost when the server restarted
|
|
||||||
alert("There was a problem processing the request, please reload the page.");
|
|
||||||
$("#loading-status-text").hide();
|
|
||||||
$('#browser-steps-ui .loader .spinner').fadeOut();
|
|
||||||
},
|
|
||||||
401: function (data) {
|
|
||||||
// More than likely the CSRF token was lost when the server restarted
|
|
||||||
alert(data.responseText);
|
|
||||||
$("#loading-status-text").hide();
|
|
||||||
$('#browser-steps-ui .loader .spinner').fadeOut();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}).done(function (data) {
|
);
|
||||||
// it should return the new state (selectors available and screenshot)
|
|
||||||
xpath_data = data.xpath_data;
|
|
||||||
$('#browsersteps-img').attr('src', data.screenshot);
|
|
||||||
$('#browser-steps-ui .loader .spinner').fadeOut();
|
|
||||||
apply_buttons_disabled = false;
|
|
||||||
$("#browsersteps-img").css('opacity', 1);
|
|
||||||
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
|
||||||
$("#loading-status-text").hide();
|
|
||||||
set_first_gotosite_disabled();
|
|
||||||
}).fail(function (data) {
|
|
||||||
console.log(data);
|
|
||||||
if (data.responseText.includes("Browser session expired")) {
|
|
||||||
disable_browsersteps_ui();
|
|
||||||
}
|
|
||||||
apply_buttons_disabled = false;
|
|
||||||
$("#loading-status-text").hide();
|
|
||||||
$('ul#browser_steps li .control .apply').css('opacity', 1);
|
|
||||||
$("#browsersteps-img").css('opacity', 1);
|
|
||||||
});
|
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
$('ul#browser_steps li .control .show-screenshot').click(function (element) {
|
$('ul#browser_steps li .control .show-screenshot').click(function (element) {
|
||||||
|
|||||||
@@ -116,6 +116,14 @@ $(document).ready(function () {
|
|||||||
$('#realtime-conn-error').show();
|
$('#realtime-conn-error').show();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Tell the server we're leaving cleanly so it can release the connection
|
||||||
|
// immediately rather than waiting for a timeout.
|
||||||
|
// Note: this only fires for voluntary closes (tab/window close, navigation away).
|
||||||
|
// Hard kills, crashes and network drops will still timeout normally on the server.
|
||||||
|
window.addEventListener('beforeunload', function () {
|
||||||
|
socket.disconnect();
|
||||||
|
});
|
||||||
|
|
||||||
socket.on('queue_size', function (data) {
|
socket.on('queue_size', function (data) {
|
||||||
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
||||||
if(queueSizePagerInfoText) {
|
if(queueSizePagerInfoText) {
|
||||||
|
|||||||
@@ -102,7 +102,9 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Navigate to search results (always redirect to watchlist home)
|
// Navigate to search results (always redirect to watchlist home)
|
||||||
window.location.href = '/?' + params.toString();
|
// Use base_path if available (for sub-path deployments like /enlighten-richerx)
|
||||||
|
const basePath = typeof base_path !== 'undefined' ? base_path : '';
|
||||||
|
window.location.href = basePath + '/?' + params.toString();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
|
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces;overflow-wrap:anywhere}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
|
||||||
|
|||||||
@@ -62,6 +62,7 @@ body.difference-page {
|
|||||||
|
|
||||||
pre {
|
pre {
|
||||||
white-space: break-spaces;
|
white-space: break-spaces;
|
||||||
|
overflow-wrap: anywhere;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -22,6 +22,8 @@ import uuid as uuid_builder
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from blinker import signal
|
from blinker import signal
|
||||||
|
|
||||||
|
from ..model.Tags import TagsDict
|
||||||
|
|
||||||
# Try to import orjson for faster JSON serialization
|
# Try to import orjson for faster JSON serialization
|
||||||
try:
|
try:
|
||||||
import orjson
|
import orjson
|
||||||
@@ -121,6 +123,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
|||||||
if 'application' in settings_data['settings']:
|
if 'application' in settings_data['settings']:
|
||||||
self.__data['settings']['application'].update(settings_data['settings']['application'])
|
self.__data['settings']['application'].update(settings_data['settings']['application'])
|
||||||
|
|
||||||
|
# Use our Tags dict with cleanup helpers etc
|
||||||
|
# @todo Same for Watches
|
||||||
|
existing_tags = settings_data.get('settings', {}).get('application', {}).get('tags') or {}
|
||||||
|
self.__data['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=self.datastore_path)
|
||||||
|
|
||||||
# More or less for the old format which had this data in the one url-watches.json
|
# More or less for the old format which had this data in the one url-watches.json
|
||||||
# cant hurt to leave it here,
|
# cant hurt to leave it here,
|
||||||
if 'watching' in settings_data:
|
if 'watching' in settings_data:
|
||||||
@@ -196,7 +203,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
|||||||
self.datastore_path = datastore_path
|
self.datastore_path = datastore_path
|
||||||
|
|
||||||
# Initialize data structure
|
# Initialize data structure
|
||||||
self.__data = App.model()
|
self.__data = App.model(datastore_path=datastore_path)
|
||||||
self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")
|
self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")
|
||||||
|
|
||||||
# Base definition for all watchers (deepcopy part of #569)
|
# Base definition for all watchers (deepcopy part of #569)
|
||||||
@@ -235,6 +242,8 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
|||||||
# No datastore yet - check if this is a fresh install or legacy migration
|
# No datastore yet - check if this is a fresh install or legacy migration
|
||||||
self.init_fresh_install(include_default_watches=include_default_watches,
|
self.init_fresh_install(include_default_watches=include_default_watches,
|
||||||
version_tag=version_tag)
|
version_tag=version_tag)
|
||||||
|
# Maybe they copied a bunch of watch subdirs across too
|
||||||
|
self._load_state()
|
||||||
|
|
||||||
def init_fresh_install(self, include_default_watches, version_tag):
|
def init_fresh_install(self, include_default_watches, version_tag):
|
||||||
# Generate app_guid FIRST (required for all operations)
|
# Generate app_guid FIRST (required for all operations)
|
||||||
@@ -353,6 +362,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
|||||||
# Deep copy settings to avoid modifying the original
|
# Deep copy settings to avoid modifying the original
|
||||||
settings_copy = copy.deepcopy(self.__data['settings'])
|
settings_copy = copy.deepcopy(self.__data['settings'])
|
||||||
|
|
||||||
|
# Is saved as {uuid}/tag.json
|
||||||
|
settings_copy['application']['tags'] = {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
|
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
|
||||||
'app_guid': self.__data.get('app_guid'),
|
'app_guid': self.__data.get('app_guid'),
|
||||||
@@ -456,6 +468,63 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
|||||||
self.__data['settings']['application']['password'] = False
|
self.__data['settings']['application']['password'] = False
|
||||||
self.commit()
|
self.commit()
|
||||||
|
|
||||||
|
def clear_all_last_checksums(self):
|
||||||
|
"""
|
||||||
|
Delete all last-checksum.txt files to force reprocessing of all watches.
|
||||||
|
|
||||||
|
This should be called when global settings change, since watches inherit
|
||||||
|
configuration and need to reprocess even if their individual watch dict
|
||||||
|
hasn't been modified.
|
||||||
|
|
||||||
|
Note: We delete the checksum file rather than setting was_edited=True because:
|
||||||
|
- was_edited is not persisted across restarts
|
||||||
|
- File deletion ensures reprocessing works across app restarts
|
||||||
|
"""
|
||||||
|
deleted_count = 0
|
||||||
|
for uuid in self.__data['watching'].keys():
|
||||||
|
watch = self.__data['watching'][uuid]
|
||||||
|
if watch.data_dir:
|
||||||
|
checksum_file = os.path.join(watch.data_dir, 'last-checksum.txt')
|
||||||
|
if os.path.isfile(checksum_file):
|
||||||
|
try:
|
||||||
|
os.remove(checksum_file)
|
||||||
|
deleted_count += 1
|
||||||
|
logger.debug(f"Cleared checksum for watch {uuid}")
|
||||||
|
except OSError as e:
|
||||||
|
logger.warning(f"Failed to delete checksum file for {uuid}: {e}")
|
||||||
|
|
||||||
|
logger.info(f"Cleared {deleted_count} checksum files to force reprocessing")
|
||||||
|
return deleted_count
|
||||||
|
|
||||||
|
def clear_checksums_for_tag(self, tag_uuid):
|
||||||
|
"""
|
||||||
|
Delete last-checksum.txt files for all watches using a specific tag.
|
||||||
|
|
||||||
|
This should be called when a tag configuration is edited, since watches
|
||||||
|
inherit tag settings and need to reprocess.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tag_uuid: UUID of the tag that was modified
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: Number of checksum files deleted
|
||||||
|
"""
|
||||||
|
deleted_count = 0
|
||||||
|
for uuid, watch in self.__data['watching'].items():
|
||||||
|
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||||
|
if watch.data_dir:
|
||||||
|
checksum_file = os.path.join(watch.data_dir, 'last-checksum.txt')
|
||||||
|
if os.path.isfile(checksum_file):
|
||||||
|
try:
|
||||||
|
os.remove(checksum_file)
|
||||||
|
deleted_count += 1
|
||||||
|
logger.debug(f"Cleared checksum for watch {uuid} (tag {tag_uuid})")
|
||||||
|
except OSError as e:
|
||||||
|
logger.warning(f"Failed to delete checksum file for {uuid}: {e}")
|
||||||
|
|
||||||
|
logger.info(f"Cleared {deleted_count} checksum files for tag {tag_uuid}")
|
||||||
|
return deleted_count
|
||||||
|
|
||||||
def commit(self):
|
def commit(self):
|
||||||
"""
|
"""
|
||||||
Save settings immediately to disk using atomic write.
|
Save settings immediately to disk using atomic write.
|
||||||
@@ -659,8 +728,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
if not is_safe_valid_url(url):
|
if not is_safe_valid_url(url):
|
||||||
flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
|
from flask import has_request_context
|
||||||
|
if has_request_context():
|
||||||
|
flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
|
||||||
|
else:
|
||||||
|
logger.error(f"add_watch: URL '{url}' is not permitted or invalid, skipping.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Check PAGE_WATCH_LIMIT if set
|
# Check PAGE_WATCH_LIMIT if set
|
||||||
|
|||||||
@@ -669,7 +669,9 @@ class DatastoreUpdatesMixin:
|
|||||||
def update_26(self):
|
def update_26(self):
|
||||||
self.migrate_legacy_db_format()
|
self.migrate_legacy_db_format()
|
||||||
|
|
||||||
def update_28(self):
|
# Re-run tag to JSON migration
|
||||||
|
def update_29(self):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Migrate tags to individual tag.json files.
|
Migrate tags to individual tag.json files.
|
||||||
|
|
||||||
@@ -682,8 +684,6 @@ class DatastoreUpdatesMixin:
|
|||||||
- Enables independent tag versioning/backup
|
- Enables independent tag versioning/backup
|
||||||
- Maintains backwards compatibility (tags stay in settings too)
|
- Maintains backwards compatibility (tags stay in settings too)
|
||||||
"""
|
"""
|
||||||
# Force save as tag.json (not watch.json) even if object is corrupted
|
|
||||||
|
|
||||||
logger.critical("=" * 80)
|
logger.critical("=" * 80)
|
||||||
logger.critical("Running migration: Individual tag persistence (update_28)")
|
logger.critical("Running migration: Individual tag persistence (update_28)")
|
||||||
logger.critical("Creating individual tag.json files")
|
logger.critical("Creating individual tag.json files")
|
||||||
@@ -702,6 +702,9 @@ class DatastoreUpdatesMixin:
|
|||||||
failed_count = 0
|
failed_count = 0
|
||||||
|
|
||||||
for uuid, tag_data in tags.items():
|
for uuid, tag_data in tags.items():
|
||||||
|
if os.path.isfile(os.path.join(self.datastore_path, uuid, "tag.json")):
|
||||||
|
logger.debug(f"Tag {uuid} tag.json exists, skipping")
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
tag_data.commit()
|
tag_data.commit()
|
||||||
saved_count += 1
|
saved_count += 1
|
||||||
@@ -723,3 +726,52 @@ class DatastoreUpdatesMixin:
|
|||||||
logger.info("Future tag edits will update both locations (dual storage)")
|
logger.info("Future tag edits will update both locations (dual storage)")
|
||||||
logger.critical("=" * 80)
|
logger.critical("=" * 80)
|
||||||
|
|
||||||
|
# write it to disk, it will be saved without ['tags'] in the JSON db because we find it from disk glob
|
||||||
|
# (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
|
||||||
|
self._save_settings()
|
||||||
|
|
||||||
|
def update_30(self):
|
||||||
|
"""Migrate restock_settings out of watch.json into restock_diff.json processor config file.
|
||||||
|
|
||||||
|
Previously, restock_diff processor settings (in_stock_processing, follow_price_changes, etc.)
|
||||||
|
were stored directly in the watch dict (watch.json). They now belong in a separate per-watch
|
||||||
|
processor config file (restock_diff.json) consistent with the processor_config_* API system.
|
||||||
|
|
||||||
|
For tags: restock_settings key is renamed to processor_config_restock_diff in the tag dict,
|
||||||
|
matching what the API writes when updating a tag.
|
||||||
|
|
||||||
|
Safe to re-run: skips watches that already have a restock_diff.json, skips tags that already
|
||||||
|
have processor_config_restock_diff set.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
|
||||||
|
# --- Watches ---
|
||||||
|
for uuid, watch in self.data['watching'].items():
|
||||||
|
if watch.get('processor') != 'restock_diff':
|
||||||
|
continue
|
||||||
|
restock_settings = watch.get('restock_settings')
|
||||||
|
if not restock_settings:
|
||||||
|
continue
|
||||||
|
|
||||||
|
data_dir = watch.data_dir
|
||||||
|
if data_dir:
|
||||||
|
watch.ensure_data_dir_exists()
|
||||||
|
filepath = os.path.join(data_dir, 'restock_diff.json')
|
||||||
|
if not os.path.isfile(filepath):
|
||||||
|
with open(filepath, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump({'restock_diff': restock_settings}, f, indent=2)
|
||||||
|
logger.info(f"update_30: migrated restock_settings → {filepath}")
|
||||||
|
|
||||||
|
del self.data['watching'][uuid]['restock_settings']
|
||||||
|
watch.commit()
|
||||||
|
|
||||||
|
# --- Tags ---
|
||||||
|
for tag_uuid, tag in self.data['settings']['application']['tags'].items():
|
||||||
|
restock_settings = tag.get('restock_settings')
|
||||||
|
if not restock_settings or tag.get('processor_config_restock_diff'):
|
||||||
|
continue
|
||||||
|
tag['processor_config_restock_diff'] = restock_settings
|
||||||
|
del tag['restock_settings']
|
||||||
|
tag.commit()
|
||||||
|
logger.info(f"update_30: migrated tag {tag_uuid} restock_settings → processor_config_restock_diff")
|
||||||
|
|
||||||
|
|||||||
@@ -44,13 +44,26 @@
|
|||||||
<td><code>{{ '{{preview_url}}' }}</code></td>
|
<td><code>{{ '{{preview_url}}' }}</code></td>
|
||||||
<td>{{ _('The URL of the preview page generated by changedetection.io.') }}</td>
|
<td>{{ _('The URL of the preview page generated by changedetection.io.') }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><code>{{ '{{change_datetime}}' }}</code></td>
|
||||||
|
<td>{{ _('Date/time of the change, accepts format=, change_datetime(format=\'%A\')\', default is \'%Y-%m-%d %H:%M:%S %Z\'') }}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><code>{{ '{{diff_url}}' }}</code></td>
|
||||||
|
<td>{{ _('The URL of the diff output for the watch.') }}</td>
|
||||||
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code>{{ '{{diff_url}}' }}</code></td>
|
<td><code>{{ '{{diff_url}}' }}</code></td>
|
||||||
<td>{{ _('The URL of the diff output for the watch.') }}</td>
|
<td>{{ _('The URL of the diff output for the watch.') }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code>{{ '{{diff}}' }}</code></td>
|
<td><code>{{ '{{diff}}' }}</code></td>
|
||||||
<td>{{ _('The diff output - only changes, additions, and removals') }}</td>
|
<td>{{ _('The diff output - only changes, additions, and removals') }}<br>
|
||||||
|
<small>
|
||||||
|
{{ _('All diff variants accept') }} <code>lines=</code>, <code>context=</code>, <code>word_diff=</code>, <code>ignore_junk=</code> {{ _('args, e.g.') }}
|
||||||
|
<code>{{ '{{diff(lines=10)}}' }}</code>, <code>{{ '{{diff_added(lines=5, context=2)}}' }}</code>
|
||||||
|
</small>
|
||||||
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code>{{ '{{diff_clean}}' }}</code></td>
|
<td><code>{{ '{{diff_clean}}' }}</code></td>
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
<li>{{ _('Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this monitor') }}</li>
|
<li>{{ _('Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this monitor') }}</li>
|
||||||
<li>{{ _('Each line is processed separately (think of each line as "OR")') }}</li>
|
<li>{{ _('Each line is processed separately (think of each line as "OR")') }}</li>
|
||||||
<li>{{ _('Note: Wrap in forward slash / to use regex example:') }} <code>/foo\d/</code></li>
|
<li>{{ _('Note: Wrap in forward slash / to use regex example:') }} <code>/foo\d/</code></li>
|
||||||
|
<li>{{ _('You can also use')}} <a href="#conditions">{{ _('conditions')}}</a> - {{ _('"Page text" - with Contains, Starts With, Not Contains and many more' ) }} <code>/foo\d/</code></li>
|
||||||
</ul>
|
</ul>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import psutil
|
import psutil
|
||||||
import time
|
import time
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
import multiprocessing
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import arrow
|
import arrow
|
||||||
@@ -13,6 +14,10 @@ import sys
|
|||||||
# When test server is slow/unresponsive, workers fail fast instead of holding UUIDs for 45s
|
# When test server is slow/unresponsive, workers fail fast instead of holding UUIDs for 45s
|
||||||
# This prevents exponential priority growth from repeated deferrals (priority × 10 each defer)
|
# This prevents exponential priority growth from repeated deferrals (priority × 10 each defer)
|
||||||
os.environ['DEFAULT_SETTINGS_REQUESTS_TIMEOUT'] = '5'
|
os.environ['DEFAULT_SETTINGS_REQUESTS_TIMEOUT'] = '5'
|
||||||
|
# Test server runs on localhost (127.0.0.1) which is a private IP.
|
||||||
|
# Allow it globally so all existing tests keep working; test_ssrf_protection
|
||||||
|
# uses monkeypatch to temporarily override this for its own assertions.
|
||||||
|
os.environ['ALLOW_IANA_RESTRICTED_ADDRESSES'] = 'true'
|
||||||
|
|
||||||
from changedetectionio.flask_app import init_app_secret, changedetection_app
|
from changedetectionio.flask_app import init_app_secret, changedetection_app
|
||||||
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
|
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
|
||||||
@@ -187,6 +192,34 @@ def cleanup(datastore_path):
|
|||||||
if os.path.isfile(f):
|
if os.path.isfile(f):
|
||||||
os.unlink(f)
|
os.unlink(f)
|
||||||
|
|
||||||
|
def pytest_configure(config):
|
||||||
|
"""Configure pytest environment before tests run.
|
||||||
|
|
||||||
|
CRITICAL: Set multiprocessing start method to 'fork' for Python 3.14+ compatibility.
|
||||||
|
|
||||||
|
Python 3.14 changed the default start method from 'fork' to 'forkserver' on Linux.
|
||||||
|
The forkserver method requires all objects to be picklable, but pytest-flask's
|
||||||
|
LiveServer uses nested functions that can't be pickled.
|
||||||
|
|
||||||
|
Setting 'fork' explicitly:
|
||||||
|
- Maintains compatibility with Python 3.10-3.13 (where 'fork' was already default)
|
||||||
|
- Fixes Python 3.14 pickling errors
|
||||||
|
- Only affects Unix-like systems (Windows uses 'spawn' regardless)
|
||||||
|
|
||||||
|
See: https://github.com/python/cpython/issues/126831
|
||||||
|
See: https://docs.python.org/3/whatsnew/3.14.html
|
||||||
|
"""
|
||||||
|
# Only set if not already set (respects existing configuration)
|
||||||
|
if multiprocessing.get_start_method(allow_none=True) is None:
|
||||||
|
try:
|
||||||
|
# 'fork' is available on Unix-like systems (Linux, macOS)
|
||||||
|
# On Windows, this will have no effect as 'spawn' is the only option
|
||||||
|
multiprocessing.set_start_method('fork', force=False)
|
||||||
|
logger.debug("Set multiprocessing start method to 'fork' for Python 3.14+ compatibility")
|
||||||
|
except (ValueError, RuntimeError):
|
||||||
|
# Already set, not available on this platform, or context already created
|
||||||
|
pass
|
||||||
|
|
||||||
def pytest_addoption(parser):
|
def pytest_addoption(parser):
|
||||||
"""Add custom command-line options for pytest.
|
"""Add custom command-line options for pytest.
|
||||||
|
|
||||||
@@ -331,6 +364,7 @@ def prepare_test_function(live_server, datastore_path):
|
|||||||
# Cleanup: Clear watches and queue after test
|
# Cleanup: Clear watches and queue after test
|
||||||
try:
|
try:
|
||||||
from changedetectionio.flask_app import update_q
|
from changedetectionio.flask_app import update_q
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
# Clear the queue to prevent leakage to next test
|
# Clear the queue to prevent leakage to next test
|
||||||
while not update_q.empty():
|
while not update_q.empty():
|
||||||
@@ -340,6 +374,18 @@ def prepare_test_function(live_server, datastore_path):
|
|||||||
break
|
break
|
||||||
|
|
||||||
datastore.data['watching'] = {}
|
datastore.data['watching'] = {}
|
||||||
|
|
||||||
|
# Delete any old watch metadata JSON files
|
||||||
|
base_path = Path(datastore.datastore_path).resolve()
|
||||||
|
max_depth = 2
|
||||||
|
|
||||||
|
for file in base_path.rglob("*.json"):
|
||||||
|
# Calculate depth relative to base path
|
||||||
|
depth = len(file.relative_to(base_path).parts) - 1
|
||||||
|
|
||||||
|
if depth <= max_depth and file.is_file():
|
||||||
|
file.unlink()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Error during datastore cleanup: {e}")
|
logger.warning(f"Error during datastore cleanup: {e}")
|
||||||
|
|
||||||
|
|||||||
@@ -170,6 +170,14 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
|
|||||||
headers={'x-api-key': api_key},
|
headers={'x-api-key': api_key},
|
||||||
)
|
)
|
||||||
assert b'(changed) Which is across' in res.data
|
assert b'(changed) Which is across' in res.data
|
||||||
|
assert b'Some text thats the same' in res.data
|
||||||
|
|
||||||
|
# Fetch the difference between two versions (default text format)
|
||||||
|
res = client.get(
|
||||||
|
url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+"?changesOnly=true",
|
||||||
|
headers={'x-api-key': api_key},
|
||||||
|
)
|
||||||
|
assert b'Some text thats the same' not in res.data
|
||||||
|
|
||||||
# Test htmlcolor format
|
# Test htmlcolor format
|
||||||
res = client.get(
|
res = client.get(
|
||||||
@@ -807,6 +815,88 @@ def test_api_import_large_background(client, live_server, measure_memory_usage,
|
|||||||
print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
|
print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that processor_config_restock_diff is accepted by the API for watches using
|
||||||
|
restock_diff processor, that its schema is validated (enum values, types), and that
|
||||||
|
genuinely unknown fields are rejected with an error that originates from the
|
||||||
|
OpenAPI spec validation layer.
|
||||||
|
"""
|
||||||
|
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
# Create a watch in restock_diff mode WITH processor_config in the POST body (matches the API docs example)
|
||||||
|
res = client.post(
|
||||||
|
url_for("createwatch"),
|
||||||
|
data=json.dumps({
|
||||||
|
"url": test_url,
|
||||||
|
"processor": "restock_diff",
|
||||||
|
"title": "Restock test",
|
||||||
|
"processor_config_restock_diff": {
|
||||||
|
"in_stock_processing": "in_stock_only",
|
||||||
|
"follow_price_changes": True,
|
||||||
|
"price_change_min": 8888888.0,
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert res.status_code == 201
|
||||||
|
watch_uuid = res.json.get('uuid')
|
||||||
|
assert is_valid_uuid(watch_uuid)
|
||||||
|
|
||||||
|
# Verify the value set on POST is reflected in the UI edit page (not just via PUT)
|
||||||
|
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert b'8888888' in res.data, "price_change_min set via POST should appear in the UI edit form"
|
||||||
|
|
||||||
|
# Valid processor_config_restock_diff update via PUT should also be accepted
|
||||||
|
res = client.put(
|
||||||
|
url_for("watch", uuid=watch_uuid),
|
||||||
|
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||||
|
data=json.dumps({
|
||||||
|
"processor_config_restock_diff": {
|
||||||
|
"in_stock_processing": "all_changes",
|
||||||
|
"follow_price_changes": False,
|
||||||
|
"price_change_min": 8888888.0,
|
||||||
|
"price_change_max": 9999999.0,
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
assert res.status_code == 200, f"Valid processor_config_restock_diff should be accepted, got: {res.data}"
|
||||||
|
|
||||||
|
# Verify the updated value is still reflected in the UI edit page
|
||||||
|
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert b'8888888' in res.data, "price_change_min set via PUT should appear in the UI edit form"
|
||||||
|
|
||||||
|
# An invalid enum value inside processor_config_restock_diff should be rejected by the spec
|
||||||
|
res = client.put(
|
||||||
|
url_for("watch", uuid=watch_uuid),
|
||||||
|
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||||
|
data=json.dumps({
|
||||||
|
"processor_config_restock_diff": {
|
||||||
|
"in_stock_processing": "not_a_valid_enum_value"
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
assert res.status_code == 400, "Invalid enum value in processor config should be rejected"
|
||||||
|
assert b'Validation failed' in res.data, "Rejection should come from OpenAPI spec validation layer"
|
||||||
|
|
||||||
|
# A completely unknown field should be rejected (either by OpenAPI spec validation or
|
||||||
|
# the application-level field filter — both are acceptable gatekeepers)
|
||||||
|
res = client.put(
|
||||||
|
url_for("watch", uuid=watch_uuid),
|
||||||
|
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||||
|
data=json.dumps({"field_that_is_not_in_the_spec_at_all": "some value"}),
|
||||||
|
)
|
||||||
|
assert res.status_code == 400, "Unknown fields should be rejected"
|
||||||
|
assert (b'Validation failed' in res.data or b'Unknown field' in res.data), \
|
||||||
|
"Rejection should come from either the OpenAPI spec validation layer or application field filter"
|
||||||
|
|
||||||
|
delete_all_watches(client)
|
||||||
|
|
||||||
|
|
||||||
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):
|
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,51 @@ by testing various scenarios that should trigger validation errors.
|
|||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks
|
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
||||||
|
|
||||||
|
|
||||||
|
def test_openapi_merged_spec_contains_restock_fields():
|
||||||
|
"""
|
||||||
|
Unit test: verify that build_merged_spec_dict() correctly merges the
|
||||||
|
restock_diff processor api.yaml into the base spec so that
|
||||||
|
WatchBase.properties includes processor_config_restock_diff with all
|
||||||
|
expected sub-fields. No live server required.
|
||||||
|
"""
|
||||||
|
from changedetectionio.api import build_merged_spec_dict
|
||||||
|
|
||||||
|
spec = build_merged_spec_dict()
|
||||||
|
schemas = spec['components']['schemas']
|
||||||
|
|
||||||
|
# The merged schema for processor_config_restock_diff should exist
|
||||||
|
assert 'processor_config_restock_diff' in schemas, \
|
||||||
|
"processor_config_restock_diff schema missing from merged spec"
|
||||||
|
|
||||||
|
restock_schema = schemas['processor_config_restock_diff']
|
||||||
|
props = restock_schema.get('properties', {})
|
||||||
|
|
||||||
|
expected_fields = {
|
||||||
|
'in_stock_processing',
|
||||||
|
'follow_price_changes',
|
||||||
|
'price_change_min',
|
||||||
|
'price_change_max',
|
||||||
|
'price_change_threshold_percent',
|
||||||
|
}
|
||||||
|
missing = expected_fields - set(props.keys())
|
||||||
|
assert not missing, f"Missing fields in processor_config_restock_diff schema: {missing}"
|
||||||
|
|
||||||
|
# in_stock_processing must be an enum with the three valid values
|
||||||
|
enum_values = set(props['in_stock_processing'].get('enum', []))
|
||||||
|
assert enum_values == {'in_stock_only', 'all_changes', 'off'}, \
|
||||||
|
f"Unexpected enum values for in_stock_processing: {enum_values}"
|
||||||
|
|
||||||
|
# WatchBase.properties must carry a $ref to the restock schema so the
|
||||||
|
# validation middleware can enforce it on every POST/PUT to /watch
|
||||||
|
watchbase_props = schemas['WatchBase']['properties']
|
||||||
|
assert 'processor_config_restock_diff' in watchbase_props, \
|
||||||
|
"processor_config_restock_diff not wired into WatchBase.properties"
|
||||||
|
ref = watchbase_props['processor_config_restock_diff'].get('$ref', '')
|
||||||
|
assert 'processor_config_restock_diff' in ref, \
|
||||||
|
f"WatchBase.processor_config_restock_diff should $ref the schema, got: {ref}"
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage, datastore_path):
|
def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||||
@@ -27,6 +71,7 @@ def test_openapi_validation_invalid_content_type_on_create_watch(client, live_se
|
|||||||
# Should get 400 error due to OpenAPI validation failure
|
# Should get 400 error due to OpenAPI validation failure
|
||||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||||
assert b"Validation failed" in res.data, "Should contain validation error message"
|
assert b"Validation failed" in res.data, "Should contain validation error message"
|
||||||
|
delete_all_watches(client)
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage, datastore_path):
|
def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||||
@@ -44,6 +89,7 @@ def test_openapi_validation_missing_required_field_create_watch(client, live_ser
|
|||||||
# Should get 400 error due to missing required field
|
# Should get 400 error due to missing required field
|
||||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||||
assert b"Validation failed" in res.data, "Should contain validation error message"
|
assert b"Validation failed" in res.data, "Should contain validation error message"
|
||||||
|
delete_all_watches(client)
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage, datastore_path):
|
def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage, datastore_path):
|
||||||
@@ -83,6 +129,7 @@ def test_openapi_validation_invalid_field_in_request_body(client, live_server, m
|
|||||||
# Backend validation now returns "Unknown field(s):" message
|
# Backend validation now returns "Unknown field(s):" message
|
||||||
assert b"Unknown field" in res.data, \
|
assert b"Unknown field" in res.data, \
|
||||||
"Should contain validation error about unknown fields"
|
"Should contain validation error about unknown fields"
|
||||||
|
delete_all_watches(client)
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path):
|
def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path):
|
||||||
@@ -100,6 +147,7 @@ def test_openapi_validation_import_wrong_content_type(client, live_server, measu
|
|||||||
# Should get 400 error due to content-type mismatch
|
# Should get 400 error due to content-type mismatch
|
||||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||||
assert b"Validation failed" in res.data, "Should contain validation error message"
|
assert b"Validation failed" in res.data, "Should contain validation error message"
|
||||||
|
delete_all_watches(client)
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage, datastore_path):
|
def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage, datastore_path):
|
||||||
@@ -117,6 +165,7 @@ def test_openapi_validation_import_correct_content_type_succeeds(client, live_se
|
|||||||
# Should succeed
|
# Should succeed
|
||||||
assert res.status_code == 200, f"Expected 200 but got {res.status_code}"
|
assert res.status_code == 200, f"Expected 200 but got {res.status_code}"
|
||||||
assert len(res.json) == 2, "Should import 2 URLs"
|
assert len(res.json) == 2, "Should import 2 URLs"
|
||||||
|
delete_all_watches(client)
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_get_requests_bypass_validation(client, live_server, measure_memory_usage, datastore_path):
|
def test_openapi_validation_get_requests_bypass_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||||
@@ -141,6 +190,7 @@ def test_openapi_validation_get_requests_bypass_validation(client, live_server,
|
|||||||
|
|
||||||
# Should return JSON with watch list (empty in this case)
|
# Should return JSON with watch list (empty in this case)
|
||||||
assert isinstance(res.json, dict), "Should return JSON dictionary for watch list"
|
assert isinstance(res.json, dict), "Should return JSON dictionary for watch list"
|
||||||
|
delete_all_watches(client)
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_create_tag_missing_required_title(client, live_server, measure_memory_usage, datastore_path):
|
def test_openapi_validation_create_tag_missing_required_title(client, live_server, measure_memory_usage, datastore_path):
|
||||||
@@ -158,10 +208,13 @@ def test_openapi_validation_create_tag_missing_required_title(client, live_serve
|
|||||||
# Should get 400 error due to missing required field
|
# Should get 400 error due to missing required field
|
||||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||||
assert b"Validation failed" in res.data, "Should contain validation error message"
|
assert b"Validation failed" in res.data, "Should contain validation error message"
|
||||||
|
delete_all_watches(client)
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage, datastore_path):
|
def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
|
||||||
"""Test that watch updates allow partial updates without requiring all fields (positive test)."""
|
"""Test that watch updates allow partial updates without requiring all fields (positive test)."""
|
||||||
|
#xxx
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||||
|
|
||||||
# First create a valid watch
|
# First create a valid watch
|
||||||
@@ -198,4 +251,5 @@ def test_openapi_validation_watch_update_allows_partial_updates(client, live_ser
|
|||||||
)
|
)
|
||||||
assert res.status_code == 200
|
assert res.status_code == 200
|
||||||
assert res.json.get('title') == 'Updated Title Only', "Title should be updated"
|
assert res.json.get('title') == 'Updated Title Only', "Title should be updated"
|
||||||
assert res.json.get('url') == 'https://example.com', "URL should remain unchanged"
|
assert res.json.get('url') == 'https://example.com', "URL should remain unchanged"
|
||||||
|
delete_all_watches(client)
|
||||||
|
|||||||
@@ -176,6 +176,97 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
|||||||
assert res.status_code == 204
|
assert res.status_code == 204
|
||||||
|
|
||||||
|
|
||||||
|
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that a tag/group can be created and updated with processor_config_restock_diff via the API.
|
||||||
|
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
|
||||||
|
"""
|
||||||
|
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||||
|
|
||||||
|
set_original_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
|
# Create a tag with processor_config_restock_diff in a single POST (issue #3966)
|
||||||
|
res = client.post(
|
||||||
|
url_for("tag"),
|
||||||
|
data=json.dumps({
|
||||||
|
"title": "Restock Group",
|
||||||
|
"overrides_watch": True,
|
||||||
|
"processor_config_restock_diff": {
|
||||||
|
"in_stock_processing": "in_stock_only",
|
||||||
|
"follow_price_changes": True,
|
||||||
|
"price_change_min": 7777777
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||||
|
)
|
||||||
|
assert res.status_code == 201, f"POST tag with restock config failed: {res.data}"
|
||||||
|
tag_uuid = res.json.get('uuid')
|
||||||
|
|
||||||
|
# Verify processor config was saved during creation (the bug: these were discarded)
|
||||||
|
res = client.get(
|
||||||
|
url_for("tag", uuid=tag_uuid),
|
||||||
|
headers={'x-api-key': api_key}
|
||||||
|
)
|
||||||
|
assert res.status_code == 200
|
||||||
|
tag_data = res.json
|
||||||
|
assert tag_data.get('overrides_watch') == True, "overrides_watch should be saved on POST"
|
||||||
|
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only', \
|
||||||
|
"processor_config_restock_diff should be saved on POST"
|
||||||
|
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 7777777, \
|
||||||
|
"price_change_min should be saved on POST"
|
||||||
|
|
||||||
|
# Update tag with valid processor_config_restock_diff via PUT
|
||||||
|
res = client.put(
|
||||||
|
url_for("tag", uuid=tag_uuid),
|
||||||
|
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||||
|
data=json.dumps({
|
||||||
|
"overrides_watch": True,
|
||||||
|
"processor_config_restock_diff": {
|
||||||
|
"in_stock_processing": "in_stock_only",
|
||||||
|
"follow_price_changes": True,
|
||||||
|
"price_change_min": 8888888
|
||||||
|
}
|
||||||
|
})
|
||||||
|
)
|
||||||
|
assert res.status_code == 200, f"PUT tag with restock config failed: {res.data}"
|
||||||
|
|
||||||
|
# Verify the config was stored via API
|
||||||
|
res = client.get(
|
||||||
|
url_for("tag", uuid=tag_uuid),
|
||||||
|
headers={'x-api-key': api_key}
|
||||||
|
)
|
||||||
|
assert res.status_code == 200
|
||||||
|
tag_data = res.json
|
||||||
|
assert tag_data.get('overrides_watch') == True
|
||||||
|
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only'
|
||||||
|
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 8888888
|
||||||
|
|
||||||
|
# Verify the value is also reflected in the UI tag edit page
|
||||||
|
res = client.get(url_for("tags.form_tag_edit", uuid=tag_uuid))
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert b'8888888' in res.data, "price_change_min set via API should appear in the UI tag edit form"
|
||||||
|
|
||||||
|
# Invalid enum value should be rejected by OpenAPI spec validation
|
||||||
|
res = client.put(
|
||||||
|
url_for("tag", uuid=tag_uuid),
|
||||||
|
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||||
|
data=json.dumps({
|
||||||
|
"processor_config_restock_diff": {
|
||||||
|
"in_stock_processing": "not_a_valid_value"
|
||||||
|
}
|
||||||
|
})
|
||||||
|
)
|
||||||
|
assert res.status_code == 400
|
||||||
|
assert b'Validation failed' in res.data
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
res = client.delete(
|
||||||
|
url_for("tag", uuid=tag_uuid),
|
||||||
|
headers={'x-api-key': api_key}
|
||||||
|
)
|
||||||
|
assert res.status_code == 204
|
||||||
|
|
||||||
|
|
||||||
def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
|
def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
|
||||||
"""
|
"""
|
||||||
Test the full round trip, this way we test the default Model fits back into OpenAPI spec
|
Test the full round trip, this way we test the default Model fits back into OpenAPI spec
|
||||||
|
|||||||
@@ -6,8 +6,6 @@ from flask import url_for
|
|||||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
|
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
|
||||||
extract_UUID_from_client, delete_all_watches
|
extract_UUID_from_client, delete_all_watches
|
||||||
|
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
|
|
||||||
# Basic test to check inscriptus is not adding return line chars, basically works etc
|
# Basic test to check inscriptus is not adding return line chars, basically works etc
|
||||||
def test_inscriptus():
|
def test_inscriptus():
|
||||||
@@ -50,6 +48,15 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
# Check this class does not appear (that we didnt see the actual source)
|
# Check this class does not appear (that we didnt see the actual source)
|
||||||
assert b'foobar-detection' not in res.data
|
assert b'foobar-detection' not in res.data
|
||||||
|
|
||||||
|
# Check POST preview
|
||||||
|
res = client.post(
|
||||||
|
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
# Check this class does not appear (that we didnt see the actual source)
|
||||||
|
assert b'foobar-detection' not in res.data
|
||||||
|
|
||||||
|
|
||||||
# Make a change
|
# Make a change
|
||||||
set_modified_response(datastore_path=datastore_path)
|
set_modified_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
|
|||||||
@@ -3,14 +3,13 @@
|
|||||||
from .util import set_original_response, live_server_setup, wait_for_all_checks
|
from .util import set_original_response, live_server_setup, wait_for_all_checks
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
import io
|
import io
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile, ZIP_DEFLATED
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
from changedetectionio.model import Watch, Tag
|
||||||
|
|
||||||
|
|
||||||
def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||||
# live_server_setup(live_server) # Setup on conftest per function
|
|
||||||
|
|
||||||
set_original_response(datastore_path=datastore_path)
|
set_original_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
|
|
||||||
@@ -32,7 +31,7 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
|||||||
time.sleep(4)
|
time.sleep(4)
|
||||||
|
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("backups.index"),
|
url_for("backups.create"),
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
# Can see the download link to the backup
|
# Can see the download link to the backup
|
||||||
@@ -54,11 +53,11 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
|||||||
backup = ZipFile(io.BytesIO(res.data))
|
backup = ZipFile(io.BytesIO(res.data))
|
||||||
l = backup.namelist()
|
l = backup.namelist()
|
||||||
|
|
||||||
# Check for UUID-based txt files (history and snapshot)
|
# Check for UUID-based txt files (history, snapshot, and last-checksum)
|
||||||
uuid4hex_txt = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
uuid4hex_txt = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||||
txt_files = list(filter(uuid4hex_txt.match, l))
|
txt_files = list(filter(uuid4hex_txt.match, l))
|
||||||
# Should be two txt files in the archive (history and the snapshot)
|
# Should be three txt files in the archive (history, snapshot, and last-checksum)
|
||||||
assert len(txt_files) == 2
|
assert len(txt_files) == 3
|
||||||
|
|
||||||
# Check for watch.json files (new format)
|
# Check for watch.json files (new format)
|
||||||
uuid4hex_json = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/watch\.json$', re.I)
|
uuid4hex_json = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/watch\.json$', re.I)
|
||||||
@@ -69,10 +68,194 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
|||||||
# Check for changedetection.json (settings file)
|
# Check for changedetection.json (settings file)
|
||||||
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
||||||
|
|
||||||
|
# secret.txt must never be included — it contains the Flask session key
|
||||||
|
assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
|
||||||
|
|
||||||
# Get the latest one
|
# Get the latest one
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("backups.remove_backups"),
|
url_for("backups.remove_backups"),
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
assert b'No backups found.' in res.data
|
assert b'No backups found.' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
def test_watch_data_package_download(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""Test downloading a single watch's data as a zip package"""
|
||||||
|
|
||||||
|
set_original_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
|
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
|
||||||
|
tag_uuid = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag")
|
||||||
|
tag_uuid2 = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag number two")
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Download the watch data package
|
||||||
|
res = client.get(url_for("ui.ui_edit.watch_get_data_package", uuid=uuid))
|
||||||
|
|
||||||
|
# Should get the right zip content type
|
||||||
|
assert res.content_type == "application/zip"
|
||||||
|
|
||||||
|
# Should be PK/ZIP stream (PKzip header)
|
||||||
|
assert res.data[:2] == b'PK', "File should start with PK (PKzip header)"
|
||||||
|
assert res.data.count(b'PK') >= 2, "Should have multiple PK markers (zip file structure)"
|
||||||
|
|
||||||
|
# Verify zip contents
|
||||||
|
backup = ZipFile(io.BytesIO(res.data))
|
||||||
|
files = backup.namelist()
|
||||||
|
|
||||||
|
# Should have files in a UUID directory
|
||||||
|
assert any(uuid in f for f in files), f"Files should be in UUID directory: {files}"
|
||||||
|
|
||||||
|
# Should contain watch.json
|
||||||
|
watch_json_path = f"{uuid}/watch.json"
|
||||||
|
assert watch_json_path in files, f"Should contain watch.json, got: {files}"
|
||||||
|
|
||||||
|
# Should contain history/snapshot files
|
||||||
|
uuid4hex_txt = re.compile(f'^{re.escape(uuid)}/.*\\.txt', re.I)
|
||||||
|
txt_files = list(filter(uuid4hex_txt.match, files))
|
||||||
|
assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_backup_restore(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""Test that a full backup zip can be restored — watches and tags survive a round-trip."""
|
||||||
|
|
||||||
|
set_original_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
|
datastore = live_server.app.config['DATASTORE']
|
||||||
|
watch_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
# Set up: one watch and two tags
|
||||||
|
uuid = datastore.add_watch(url=watch_url)
|
||||||
|
tag_uuid = datastore.add_tag(title="Tasty backup tag")
|
||||||
|
tag_uuid2 = datastore.add_tag(title="Tasty backup tag number two")
|
||||||
|
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Create a full backup
|
||||||
|
client.get(url_for("backups.request_backup"), follow_redirects=True)
|
||||||
|
time.sleep(4)
|
||||||
|
|
||||||
|
# Download the latest backup zip
|
||||||
|
res = client.get(url_for("backups.download_backup", filename="latest"), follow_redirects=True)
|
||||||
|
assert res.content_type == "application/zip"
|
||||||
|
zip_data = res.data
|
||||||
|
|
||||||
|
# Confirm the zip contains both watch.json and tag.json entries
|
||||||
|
backup = ZipFile(io.BytesIO(zip_data))
|
||||||
|
names = backup.namelist()
|
||||||
|
assert f"{uuid}/watch.json" in names, f"watch.json missing from backup: {names}"
|
||||||
|
assert f"{tag_uuid}/tag.json" in names, f"tag.json for tag 1 missing from backup: {names}"
|
||||||
|
assert f"{tag_uuid2}/tag.json" in names, f"tag.json for tag 2 missing from backup: {names}"
|
||||||
|
|
||||||
|
# --- Wipe everything ---
|
||||||
|
datastore.delete('all')
|
||||||
|
client.get(url_for("tags.delete_all"), follow_redirects=True)
|
||||||
|
|
||||||
|
assert uuid not in datastore.data['watching'], "Watch should be gone after delete"
|
||||||
|
assert tag_uuid not in datastore.data['settings']['application']['tags'], "Tag 1 should be gone after delete"
|
||||||
|
assert tag_uuid2 not in datastore.data['settings']['application']['tags'], "Tag 2 should be gone after delete"
|
||||||
|
|
||||||
|
# --- Restore from the backup zip ---
|
||||||
|
res = client.post(
|
||||||
|
url_for("backups.restore.backups_restore_start"),
|
||||||
|
data={
|
||||||
|
'zip_file': (io.BytesIO(zip_data), 'backup.zip'),
|
||||||
|
'include_groups': 'y',
|
||||||
|
'include_groups_replace_existing': 'y',
|
||||||
|
'include_watches': 'y',
|
||||||
|
'include_watches_replace_existing': 'y',
|
||||||
|
},
|
||||||
|
content_type='multipart/form-data',
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert res.status_code == 200
|
||||||
|
|
||||||
|
# Wait for the thread to finish
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# --- Watch checks ---
|
||||||
|
restored_watch = datastore.data['watching'].get(uuid)
|
||||||
|
assert restored_watch is not None, f"Watch {uuid} not found after restore"
|
||||||
|
assert restored_watch['url'] == watch_url, "Restored watch URL does not match"
|
||||||
|
assert isinstance(restored_watch, Watch.model), \
|
||||||
|
f"Watch not properly rehydrated, got {type(restored_watch)}"
|
||||||
|
assert restored_watch.history_n >= 1, \
|
||||||
|
f"Restored watch should have at least 1 history entry, got {restored_watch.history_n}"
|
||||||
|
|
||||||
|
# --- Tag checks ---
|
||||||
|
restored_tags = datastore.data['settings']['application']['tags']
|
||||||
|
|
||||||
|
restored_tag = restored_tags.get(tag_uuid)
|
||||||
|
assert restored_tag is not None, f"Tag {tag_uuid} not found after restore"
|
||||||
|
assert restored_tag['title'] == "Tasty backup tag", "Restored tag 1 title does not match"
|
||||||
|
assert isinstance(restored_tag, Tag.model), \
|
||||||
|
f"Tag 1 not properly rehydrated, got {type(restored_tag)}"
|
||||||
|
|
||||||
|
restored_tag2 = restored_tags.get(tag_uuid2)
|
||||||
|
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
|
||||||
|
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
|
||||||
|
assert isinstance(restored_tag2, Tag.model), \
|
||||||
|
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""Zip Slip path traversal entries in a restore zip must be rejected."""
|
||||||
|
import pytest
|
||||||
|
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||||
|
|
||||||
|
# Build a zip with a path traversal entry that would escape the extraction dir
|
||||||
|
malicious_zip = io.BytesIO()
|
||||||
|
with ZipFile(malicious_zip, 'w') as zf:
|
||||||
|
zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
|
||||||
|
malicious_zip.seek(0)
|
||||||
|
|
||||||
|
datastore = live_server.app.config['DATASTORE']
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match="Zip Slip"):
|
||||||
|
import_from_zip(
|
||||||
|
zip_stream=malicious_zip,
|
||||||
|
datastore=datastore,
|
||||||
|
include_groups=True,
|
||||||
|
include_groups_replace=True,
|
||||||
|
include_watches=True,
|
||||||
|
include_watches_replace=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""A zip whose total uncompressed size exceeds the limit must be rejected.
|
||||||
|
|
||||||
|
The guard reads file_size from the zip central-directory metadata — no
|
||||||
|
actual decompression happens, so this test is fast and uses minimal RAM.
|
||||||
|
100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
|
||||||
|
50 KB is enough to trigger the check without creating any large files.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
import changedetectionio.blueprint.backups.restore as restore_mod
|
||||||
|
from changedetectionio.blueprint.backups.restore import import_from_zip
|
||||||
|
|
||||||
|
# ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
|
||||||
|
bomb_zip = io.BytesIO()
|
||||||
|
with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
|
||||||
|
zf.writestr("data.txt", b"\x00" * (100 * 1024))
|
||||||
|
bomb_zip.seek(0)
|
||||||
|
|
||||||
|
datastore = live_server.app.config['DATASTORE']
|
||||||
|
original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
|
||||||
|
try:
|
||||||
|
restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test
|
||||||
|
with pytest.raises(ValueError, match="decompressed size"):
|
||||||
|
import_from_zip(
|
||||||
|
zip_stream=bomb_zip,
|
||||||
|
datastore=datastore,
|
||||||
|
include_groups=True,
|
||||||
|
include_groups_replace=True,
|
||||||
|
include_watches=True,
|
||||||
|
include_watches_replace=True,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
restore_mod._MAX_DECOMPRESSED_BYTES = original_limit
|
||||||
@@ -71,22 +71,19 @@ def test_include_filters_output():
|
|||||||
|
|
||||||
# Tests the whole stack works with the CSS Filter
|
# Tests the whole stack works with the CSS Filter
|
||||||
def test_check_markup_include_filters_restriction(client, live_server, measure_memory_usage, datastore_path):
|
def test_check_markup_include_filters_restriction(client, live_server, measure_memory_usage, datastore_path):
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
include_filters = "#sametext"
|
include_filters = "#sametext"
|
||||||
|
|
||||||
set_original_response(datastore_path=datastore_path)
|
set_original_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
wait_for_all_checks(client)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
|
||||||
# Goto the edit page, add our ignore text
|
# Goto the edit page, add our ignore text
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
@@ -103,15 +100,15 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
|
|||||||
)
|
)
|
||||||
assert bytes(include_filters.encode('utf-8')) in res.data
|
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
wait_for_all_checks(client)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
# Make a change
|
# Make a change
|
||||||
set_modified_response(datastore_path=datastore_path)
|
set_modified_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
# Trigger a check
|
# Trigger a check
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
# Give the thread time to pick it up
|
wait_for_all_checks(client)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
|
||||||
# It should have 'has-unread-changes' still
|
# It should have 'has-unread-changes' still
|
||||||
# Because it should be looking at only that 'sametext' id
|
# Because it should be looking at only that 'sametext' id
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||||
@@ -11,6 +12,69 @@ import os
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_surrogate_characters_in_content_are_sanitized():
|
||||||
|
"""Lone surrogates can appear in requests' r.text when a server returns malformed/mixed-encoding
|
||||||
|
content. Without sanitization, encoding to UTF-8 raises UnicodeEncodeError.
|
||||||
|
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||||
|
"""
|
||||||
|
content_with_surrogate = '<html><body>Hello \udcad World</body></html>'
|
||||||
|
|
||||||
|
# Confirm the raw problem exists
|
||||||
|
with pytest.raises(UnicodeEncodeError):
|
||||||
|
content_with_surrogate.encode('utf-8')
|
||||||
|
|
||||||
|
# Our fix: sanitize after fetcher.run() in processors/base.py call_browser()
|
||||||
|
sanitized = content_with_surrogate.encode('utf-8', errors='replace').decode('utf-8')
|
||||||
|
assert 'Hello' in sanitized
|
||||||
|
assert 'World' in sanitized
|
||||||
|
assert '\udcad' not in sanitized
|
||||||
|
|
||||||
|
# Checksum computation (processors/base.py get_raw_document_checksum) must not crash
|
||||||
|
hashlib.md5(sanitized.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def test_utf8_content_without_charset_header(client, live_server, datastore_path):
|
||||||
|
"""Server returns UTF-8 content but no charset in Content-Type header.
|
||||||
|
chardet can misdetect such pages as UTF-7 (Python 3.14 then produces surrogates).
|
||||||
|
Our fix tries UTF-8 first before falling back to chardet.
|
||||||
|
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||||
|
"""
|
||||||
|
from .util import write_test_file_and_sync
|
||||||
|
# UTF-8 encoded content with non-ASCII chars - no charset will be in the header
|
||||||
|
html = '<html><body><p>Español</p><p>Français</p><p>日本語</p></body></html>'
|
||||||
|
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('utf-8'), mode='wb')
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||||
|
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
|
||||||
|
# Should decode correctly as UTF-8, not produce mojibake (Español) or replacement chars
|
||||||
|
assert 'Español'.encode('utf-8') in res.data
|
||||||
|
assert 'Français'.encode('utf-8') in res.data
|
||||||
|
assert '日本語'.encode('utf-8') in res.data
|
||||||
|
|
||||||
|
|
||||||
|
def test_shiftjis_with_meta_charset(client, live_server, datastore_path):
|
||||||
|
"""Server returns Shift-JIS content with no charset in HTTP header, but the HTML
|
||||||
|
declares <meta charset="Shift-JIS">. We should use the meta tag, not chardet.
|
||||||
|
Real-world case: https://github.com/dgtlmoon/changedetection.io/issues/3952
|
||||||
|
"""
|
||||||
|
from .util import write_test_file_and_sync
|
||||||
|
japanese_text = '日本語のページ'
|
||||||
|
html = f'<html><head><meta http-equiv="Content-Type" content="text/html;charset=Shift-JIS"></head><body><p>{japanese_text}</p></body></html>'
|
||||||
|
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('shift_jis'), mode='wb')
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||||
|
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
|
||||||
|
assert japanese_text.encode('utf-8') in res.data
|
||||||
|
|
||||||
|
|
||||||
def set_html_response(datastore_path):
|
def set_html_response(datastore_path):
|
||||||
test_return_data = """
|
test_return_data = """
|
||||||
<html><body><span class="nav_second_img_text">
|
<html><body><span class="nav_second_img_text">
|
||||||
|
|||||||
@@ -6,10 +6,6 @@ from urllib.request import urlopen
|
|||||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||||
import os
|
import os
|
||||||
|
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_check_extract_text_from_diff(client, live_server, measure_memory_usage, datastore_path):
|
def test_check_extract_text_from_diff(client, live_server, measure_memory_usage, datastore_path):
|
||||||
import time
|
import time
|
||||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
|||||||
|
|
||||||
# Find the snapshot one
|
# Find the snapshot one
|
||||||
for fname in files_in_watch_dir:
|
for fname in files_in_watch_dir:
|
||||||
if fname != 'history.txt' and fname != 'watch.json' and 'html' not in fname:
|
if fname != 'history.txt' and fname != 'watch.json' and fname != 'last-checksum.txt' and 'html' not in fname:
|
||||||
if strtobool(os.getenv("TEST_WITH_BROTLI")):
|
if strtobool(os.getenv("TEST_WITH_BROTLI")):
|
||||||
assert fname.endswith('.br'), "Forced TEST_WITH_BROTLI then it should be a .br filename"
|
assert fname.endswith('.br'), "Forced TEST_WITH_BROTLI then it should be a .br filename"
|
||||||
|
|
||||||
@@ -123,11 +123,18 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
|||||||
assert json_obj['watching'][w]['title'], "Watch should have a title set"
|
assert json_obj['watching'][w]['title'], "Watch should have a title set"
|
||||||
assert contents.startswith(watch_title + "x"), f"Snapshot contents in file {fname} should start with '{watch_title}x', got '{contents}'"
|
assert contents.startswith(watch_title + "x"), f"Snapshot contents in file {fname} should start with '{watch_title}x', got '{contents}'"
|
||||||
|
|
||||||
# With new format, we also have watch.json, so 4 files total
|
# With new format, we have watch.json, so 4 files minimum
|
||||||
|
# Note: last-checksum.txt may or may not exist - it gets cleared by settings changes,
|
||||||
|
# and this test changes settings before checking files
|
||||||
|
# This assertion should be AFTER the loop, not inside it
|
||||||
if os.path.exists(changedetection_json):
|
if os.path.exists(changedetection_json):
|
||||||
assert len(files_in_watch_dir) == 4, "Should be four files in the dir with new format: watch.json, html.br snapshot, history.txt and the extracted text snapshot"
|
# 4 required files: watch.json, html.br, history.txt, extracted text snapshot
|
||||||
|
# last-checksum.txt is optional (cleared by settings changes in this test)
|
||||||
|
assert len(files_in_watch_dir) >= 4 and len(files_in_watch_dir) <= 5, f"Should be 4-5 files in the dir with new format (last-checksum.txt is optional). Found {len(files_in_watch_dir)}: {files_in_watch_dir}"
|
||||||
else:
|
else:
|
||||||
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir with legacy format: html.br snapshot, history.txt and the extracted text snapshot"
|
# 3 required files: html.br, history.txt, extracted text snapshot
|
||||||
|
# last-checksum.txt is optional
|
||||||
|
assert len(files_in_watch_dir) >= 3 and len(files_in_watch_dir) <= 4, f"Should be 3-4 files in the dir with legacy format (last-checksum.txt is optional). Found {len(files_in_watch_dir)}: {files_in_watch_dir}"
|
||||||
|
|
||||||
# Check that 'default' Watch vars aren't accidentally being saved
|
# Check that 'default' Watch vars aren't accidentally being saved
|
||||||
if os.path.exists(changedetection_json):
|
if os.path.exists(changedetection_json):
|
||||||
|
|||||||
@@ -624,3 +624,76 @@ def test_session_locale_overrides_accept_language(client, live_server, measure_m
|
|||||||
assert "분".encode() in res.data, "Expected Korean '분' for Minutes"
|
assert "분".encode() in res.data, "Expected Korean '분' for Minutes"
|
||||||
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
|
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
|
||||||
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
|
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clear_history_translated_confirmation(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that clearing snapshot history works with translated confirmation text.
|
||||||
|
|
||||||
|
Issue #3865: When the app language is set to German, the clear history
|
||||||
|
confirmation dialog shows the translated word (e.g. 'loschen') but the
|
||||||
|
backend only accepted the English word 'clear', making it impossible
|
||||||
|
to clear snapshots in non-English languages.
|
||||||
|
"""
|
||||||
|
from flask import url_for
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
# Add a watch so there is history to clear
|
||||||
|
res = client.post(
|
||||||
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Set language to German
|
||||||
|
res = client.get(
|
||||||
|
url_for("set_language", locale="de"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert res.status_code == 200
|
||||||
|
|
||||||
|
# Verify the clear history page shows the German confirmation word
|
||||||
|
res = client.get(
|
||||||
|
url_for("ui.clear_all_history"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert "löschen".encode() in res.data, "Expected German word 'loschen' on clear history page"
|
||||||
|
|
||||||
|
# Submit the form with the German translated word
|
||||||
|
res = client.post(
|
||||||
|
url_for("ui.clear_all_history"),
|
||||||
|
data={"confirmtext": "löschen"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert res.status_code == 200
|
||||||
|
# Should NOT show error message
|
||||||
|
assert b"Incorrect confirmation text" not in res.data, \
|
||||||
|
"German confirmation word 'loschen' should be accepted (issue #3865)"
|
||||||
|
|
||||||
|
# Switch back to English and verify English word still works
|
||||||
|
res = client.get(
|
||||||
|
url_for("set_language", locale="en_US"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("ui.clear_all_history"),
|
||||||
|
data={"confirmtext": "clear"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert b"Incorrect confirmation text" not in res.data, \
|
||||||
|
"English confirmation word 'clear' should still be accepted"
|
||||||
|
|
||||||
|
# Verify that missing/empty confirmtext does not crash the server
|
||||||
|
res = client.post(
|
||||||
|
url_for("ui.clear_all_history"),
|
||||||
|
data={},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert res.status_code == 200, \
|
||||||
|
"Missing confirmtext should not crash the server"
|
||||||
|
|||||||
@@ -41,7 +41,6 @@ def set_modified_ignore_response(datastore_path):
|
|||||||
def test_render_anchor_tag_content_true(client, live_server, measure_memory_usage, datastore_path):
|
def test_render_anchor_tag_content_true(client, live_server, measure_memory_usage, datastore_path):
|
||||||
"""Testing that the link changes are detected when
|
"""Testing that the link changes are detected when
|
||||||
render_anchor_tag_content setting is set to true"""
|
render_anchor_tag_content setting is set to true"""
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
# Give the endpoint time to spin up
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|||||||
@@ -100,7 +100,6 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me
|
|||||||
|
|
||||||
# Tests the whole stack works with staus codes ignored
|
# Tests the whole stack works with staus codes ignored
|
||||||
def test_403_page_check_works_with_ignore_status_code(client, live_server, measure_memory_usage, datastore_path):
|
def test_403_page_check_works_with_ignore_status_code(client, live_server, measure_memory_usage, datastore_path):
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
set_original_response(datastore_path=datastore_path)
|
set_original_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
@@ -112,8 +111,7 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu
|
|||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
wait_for_all_checks(client)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
|
||||||
# Goto the edit page, check our ignore option
|
# Goto the edit page, check our ignore option
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
|
|||||||
@@ -2,10 +2,9 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from . util import live_server_setup
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from .util import live_server_setup, delete_all_watches, wait_for_all_checks
|
||||||
|
|
||||||
|
|
||||||
# Should be the same as set_original_ignore_response(datastore_path=datastore_path) but with a little more whitespacing
|
# Should be the same as set_original_ignore_response(datastore_path=datastore_path) but with a little more whitespacing
|
||||||
@@ -50,10 +49,7 @@ def set_original_ignore_response(datastore_path):
|
|||||||
|
|
||||||
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||||
def test_check_ignore_whitespace(client, live_server, measure_memory_usage, datastore_path):
|
def test_check_ignore_whitespace(client, live_server, measure_memory_usage, datastore_path):
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
set_original_ignore_response(datastore_path=datastore_path)
|
set_original_ignore_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
@@ -74,17 +70,17 @@ def test_check_ignore_whitespace(client, live_server, measure_memory_usage, data
|
|||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
# Trigger a check
|
# Trigger a check
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
set_original_ignore_response_but_with_whitespace(datastore_path)
|
set_original_ignore_response_but_with_whitespace(datastore_path)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
# Trigger a check
|
# Trigger a check
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'has-unread-changes' class)
|
# It should report nothing found (no new 'has-unread-changes' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
|
|||||||
@@ -16,6 +16,51 @@ except ModuleNotFoundError:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_jsonp_treated_as_plaintext():
|
||||||
|
from ..processors.magic import guess_stream_type
|
||||||
|
|
||||||
|
# JSONP content (server wrongly claims application/json) should be detected as plaintext
|
||||||
|
# Callback names are arbitrary identifiers, not always 'cb'
|
||||||
|
jsonp_content = 'jQuery123456({ "version": "8.0.41", "url": "https://example.com/app.apk" })'
|
||||||
|
result = guess_stream_type(http_content_header="application/json", content=jsonp_content)
|
||||||
|
assert result.is_json is False
|
||||||
|
assert result.is_plaintext is True
|
||||||
|
|
||||||
|
# Variation with dotted callback name e.g. jQuery.cb(...)
|
||||||
|
jsonp_dotted = 'some.callback({ "version": "1.0" })'
|
||||||
|
result = guess_stream_type(http_content_header="application/json", content=jsonp_dotted)
|
||||||
|
assert result.is_json is False
|
||||||
|
assert result.is_plaintext is True
|
||||||
|
|
||||||
|
# Real JSON should still be detected as JSON
|
||||||
|
json_content = '{ "version": "8.0.41", "url": "https://example.com/app.apk" }'
|
||||||
|
result = guess_stream_type(http_content_header="application/json", content=json_content)
|
||||||
|
assert result.is_json is True
|
||||||
|
assert result.is_plaintext is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_jsonp_json_filter_extraction():
|
||||||
|
from .. import html_tools
|
||||||
|
|
||||||
|
# Tough case: dotted namespace callback, trailing semicolon, deeply nested content with arrays
|
||||||
|
jsonp_content = 'weixin.update.callback({"platforms": {"android": {"variants": [{"arch": "arm64", "versionName": "8.0.68", "url": "https://example.com/app-arm64.apk"}, {"arch": "arm32", "versionName": "8.0.41", "url": "https://example.com/app-arm32.apk"}]}}});'
|
||||||
|
|
||||||
|
# Deep nested jsonpath filter into array element
|
||||||
|
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[0].versionName")
|
||||||
|
assert text == '"8.0.68"'
|
||||||
|
|
||||||
|
# Filter that selects the second array element
|
||||||
|
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[1].arch")
|
||||||
|
assert text == '"arm32"'
|
||||||
|
|
||||||
|
if jq_support:
|
||||||
|
text = html_tools.extract_json_as_string(jsonp_content, "jq:.platforms.android.variants[0].versionName")
|
||||||
|
assert text == '"8.0.68"'
|
||||||
|
|
||||||
|
text = html_tools.extract_json_as_string(jsonp_content, "jqraw:.platforms.android.variants[1].url")
|
||||||
|
assert text == "https://example.com/app-arm32.apk"
|
||||||
|
|
||||||
|
|
||||||
def test_unittest_inline_html_extract():
|
def test_unittest_inline_html_extract():
|
||||||
# So lets pretend that the JSON we want is inside some HTML
|
# So lets pretend that the JSON we want is inside some HTML
|
||||||
content="""
|
content="""
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from changedetectionio.notification import (
|
|||||||
)
|
)
|
||||||
from ..diff import HTML_CHANGED_STYLE
|
from ..diff import HTML_CHANGED_STYLE
|
||||||
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||||
|
from ..notification_service import FormattableTimestamp
|
||||||
|
|
||||||
|
|
||||||
# Hard to just add more live server URLs when one test is already running (I think)
|
# Hard to just add more live server URLs when one test is already running (I think)
|
||||||
@@ -107,7 +108,11 @@ def test_check_notification(client, live_server, measure_memory_usage, datastore
|
|||||||
"Diff Added: {{diff_added}}\n"
|
"Diff Added: {{diff_added}}\n"
|
||||||
"Diff Removed: {{diff_removed}}\n"
|
"Diff Removed: {{diff_removed}}\n"
|
||||||
"Diff Full: {{diff_full}}\n"
|
"Diff Full: {{diff_full}}\n"
|
||||||
|
"Diff with args: {{diff(context=3)}}"
|
||||||
"Diff as Patch: {{diff_patch}}\n"
|
"Diff as Patch: {{diff_patch}}\n"
|
||||||
|
"Change datetime: {{change_datetime}}\n"
|
||||||
|
"Change datetime format: Weekday {{change_datetime(format='%A')}}\n"
|
||||||
|
"Change datetime format: {{change_datetime(format='%Y-%m-%dT%H:%M:%S%z')}}\n"
|
||||||
":-)",
|
":-)",
|
||||||
"notification_screenshot": True,
|
"notification_screenshot": True,
|
||||||
"notification_format": 'text'}
|
"notification_format": 'text'}
|
||||||
@@ -135,8 +140,6 @@ def test_check_notification(client, live_server, measure_memory_usage, datastore
|
|||||||
assert bytes(notification_url.encode('utf-8')) in res.data
|
assert bytes(notification_url.encode('utf-8')) in res.data
|
||||||
assert bytes("New ChangeDetection.io Notification".encode('utf-8')) in res.data
|
assert bytes("New ChangeDetection.io Notification".encode('utf-8')) in res.data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Now recheck, and it should have sent the notification
|
## Now recheck, and it should have sent the notification
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
set_modified_response(datastore_path=datastore_path)
|
set_modified_response(datastore_path=datastore_path)
|
||||||
@@ -172,11 +175,23 @@ def test_check_notification(client, live_server, measure_memory_usage, datastore
|
|||||||
assert ":-)" in notification_submission
|
assert ":-)" in notification_submission
|
||||||
assert "New ChangeDetection.io Notification - {}".format(test_url) in notification_submission
|
assert "New ChangeDetection.io Notification - {}".format(test_url) in notification_submission
|
||||||
assert test_url in notification_submission
|
assert test_url in notification_submission
|
||||||
|
|
||||||
assert ':-)' in notification_submission
|
assert ':-)' in notification_submission
|
||||||
# Check the attachment was added, and that it is a JPEG from the original PNG
|
# Check the attachment was added, and that it is a JPEG from the original PNG
|
||||||
notification_submission_object = json.loads(notification_submission)
|
notification_submission_object = json.loads(notification_submission)
|
||||||
assert notification_submission_object
|
assert notification_submission_object
|
||||||
|
|
||||||
|
import time
|
||||||
|
# Could be from a few seconds ago (when the notification was fired vs in this test checking), so check for any
|
||||||
|
times_possible = [str(FormattableTimestamp(int(time.time()) - i)) for i in range(15)]
|
||||||
|
assert any(t in notification_submission for t in times_possible)
|
||||||
|
|
||||||
|
txt = f"Weekday {FormattableTimestamp(int(time.time()))(format='%A')}"
|
||||||
|
assert txt in notification_submission
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# We keep PNG screenshots for now
|
# We keep PNG screenshots for now
|
||||||
# IF THIS FAILS YOU SHOULD BE TESTING WITH ENV VAR REMOVE_REQUESTS_OLD_SCREENSHOTS=False
|
# IF THIS FAILS YOU SHOULD BE TESTING WITH ENV VAR REMOVE_REQUESTS_OLD_SCREENSHOTS=False
|
||||||
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.png'
|
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.png'
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ def test_itemprop_price_change(client, live_server, measure_memory_usage, datast
|
|||||||
set_original_response(props_markup=instock_props[0], price='120.45', datastore_path=datastore_path)
|
set_original_response(props_markup=instock_props[0], price='120.45', datastore_path=datastore_path)
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={"restock_settings-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
data={"processor_config_restock_diff-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -204,9 +204,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form, datastore_path):
|
|||||||
def test_restock_itemprop_minmax(client, live_server, measure_memory_usage, datastore_path):
|
def test_restock_itemprop_minmax(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
|
||||||
extras = {
|
extras = {
|
||||||
"restock_settings-follow_price_changes": "y",
|
"processor_config_restock_diff-follow_price_changes": "y",
|
||||||
"restock_settings-price_change_min": 900.0,
|
"processor_config_restock_diff-price_change_min": 900.0,
|
||||||
"restock_settings-price_change_max": 1100.10
|
"processor_config_restock_diff-price_change_max": 1100.10
|
||||||
}
|
}
|
||||||
_run_test_minmax_limit(client, extra_watch_edit_form=extras, datastore_path=datastore_path)
|
_run_test_minmax_limit(client, extra_watch_edit_form=extras, datastore_path=datastore_path)
|
||||||
|
|
||||||
@@ -223,9 +223,9 @@ def test_restock_itemprop_with_tag(client, live_server, measure_memory_usage, da
|
|||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("tags.form_tag_edit_submit", uuid="first"),
|
url_for("tags.form_tag_edit_submit", uuid="first"),
|
||||||
data={"name": "test-tag",
|
data={"name": "test-tag",
|
||||||
"restock_settings-follow_price_changes": "y",
|
"processor_config_restock_diff-follow_price_changes": "y",
|
||||||
"restock_settings-price_change_min": 900.0,
|
"processor_config_restock_diff-price_change_min": 900.0,
|
||||||
"restock_settings-price_change_max": 1100.10,
|
"processor_config_restock_diff-price_change_max": 1100.10,
|
||||||
"overrides_watch": "y", #overrides_watch should be restock_overrides_watch
|
"overrides_watch": "y", #overrides_watch should be restock_overrides_watch
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
@@ -258,8 +258,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d
|
|||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={"restock_settings-follow_price_changes": "y",
|
data={"processor_config_restock_diff-follow_price_changes": "y",
|
||||||
"restock_settings-price_change_threshold_percent": 5.0,
|
"processor_config_restock_diff-price_change_threshold_percent": 5.0,
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
@@ -305,8 +305,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d
|
|||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||||
data={"restock_settings-follow_price_changes": "y",
|
data={"processor_config_restock_diff-follow_price_changes": "y",
|
||||||
"restock_settings-price_change_threshold_percent": 5.05,
|
"processor_config_restock_diff-price_change_threshold_percent": 5.05,
|
||||||
"processor": "text_json_diff",
|
"processor": "text_json_diff",
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests",
|
||||||
@@ -467,3 +467,38 @@ def test_special_prop_examples(client, live_server, measure_memory_usage, datast
|
|||||||
assert b'155.55' in res.data
|
assert b'155.55' in res.data
|
||||||
|
|
||||||
delete_all_watches(client)
|
delete_all_watches(client)
|
||||||
|
|
||||||
|
|
||||||
|
def test_itemprop_as_str(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
|
||||||
|
test_return_data = f"""<html>
|
||||||
|
<body>
|
||||||
|
Some initial text<br>
|
||||||
|
<p>Which is across multiple lines</p>
|
||||||
|
<span itemprop="offers" itemscope itemtype="http://schema.org/Offer">
|
||||||
|
<meta content="767.55" itemprop="price"/>
|
||||||
|
<meta content="EUR" itemprop="priceCurrency"/>
|
||||||
|
<meta content="InStock" itemprop="availability"/>
|
||||||
|
<meta content="https://www.123-test.dk" itemprop="url"/>
|
||||||
|
</span>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
client.post(
|
||||||
|
url_for("ui.ui_views.form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
client.get(url_for("ui.form_watch_checknow"))
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
res = client.get(url_for("watchlist.index"))
|
||||||
|
assert b'767.55' in res.data
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import pytest
|
||||||
|
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
|
|
||||||
@@ -24,6 +25,31 @@ def set_original_response(datastore_path):
|
|||||||
f.write(test_return_data)
|
f.write(test_return_data)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def test_favicon(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
# Attempt to fetch it, make sure that works
|
||||||
|
SVG_BASE64 = 'PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxIDEiLz4='
|
||||||
|
uuid = client.application.config.get('DATASTORE').add_watch(url='https://localhost')
|
||||||
|
live_server.app.config['DATASTORE'].data['watching'][uuid].bump_favicon(url="favicon-set-type.svg",
|
||||||
|
favicon_base_64=SVG_BASE64
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
res = client.get(url_for('static_content', group='favicon', filename=uuid))
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert len(res.data) > 10
|
||||||
|
|
||||||
|
res = client.get(url_for('static_content', group='..', filename='__init__.py'))
|
||||||
|
assert res.status_code != 200
|
||||||
|
|
||||||
|
|
||||||
|
res = client.get(url_for('static_content', group='.', filename='../__init__.py'))
|
||||||
|
assert res.status_code != 200
|
||||||
|
|
||||||
|
# Traverse by filename protection
|
||||||
|
res = client.get(url_for('static_content', group='js', filename='../styles/styles.css'))
|
||||||
|
assert res.status_code != 200
|
||||||
|
|
||||||
def test_bad_access(client, live_server, measure_memory_usage, datastore_path):
|
def test_bad_access(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
@@ -478,3 +504,243 @@ def test_logout_with_redirect(client, live_server, measure_memory_usage, datasto
|
|||||||
# Cleanup
|
# Cleanup
|
||||||
del client.application.config['DATASTORE'].data['settings']['application']['password']
|
del client.application.config['DATASTORE'].data['settings']['application']['password']
|
||||||
|
|
||||||
|
|
||||||
|
def test_static_directory_traversal(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that the static file serving route properly blocks directory traversal attempts.
|
||||||
|
This tests the fix for GHSA-9jj8-v89v-xjvw (CVE pending).
|
||||||
|
|
||||||
|
The vulnerability was in /static/<group>/<filename> where the sanitization regex
|
||||||
|
allowed dots, enabling "../" traversal to read application source files.
|
||||||
|
|
||||||
|
The fix changed the regex from r'[^\w.-]+' to r'[^a-z0-9_]+' which blocks dots.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Test 1: Direct .. traversal attempt (URL-encoded)
|
||||||
|
res = client.get(
|
||||||
|
"/static/%2e%2e/flask_app.py",
|
||||||
|
follow_redirects=False
|
||||||
|
)
|
||||||
|
# Should be blocked (404 or 403)
|
||||||
|
assert res.status_code in [404, 403], f"Expected 404/403, got {res.status_code}"
|
||||||
|
# Should NOT contain application source code
|
||||||
|
assert b"def static_content" not in res.data
|
||||||
|
assert b"changedetection_app" not in res.data
|
||||||
|
|
||||||
|
# Test 2: Direct .. traversal attempt (unencoded)
|
||||||
|
res = client.get(
|
||||||
|
"/static/../flask_app.py",
|
||||||
|
follow_redirects=False
|
||||||
|
)
|
||||||
|
assert res.status_code in [404, 403], f"Expected 404/403, got {res.status_code}"
|
||||||
|
assert b"def static_content" not in res.data
|
||||||
|
|
||||||
|
# Test 3: Multiple dots traversal
|
||||||
|
res = client.get(
|
||||||
|
"/static/..../flask_app.py",
|
||||||
|
follow_redirects=False
|
||||||
|
)
|
||||||
|
assert res.status_code in [404, 403], f"Expected 404/403, got {res.status_code}"
|
||||||
|
assert b"def static_content" not in res.data
|
||||||
|
|
||||||
|
# Test 4: Try to access other application files
|
||||||
|
for filename in ["__init__.py", "datastore.py", "store.py"]:
|
||||||
|
res = client.get(
|
||||||
|
f"/static/%2e%2e/{filename}",
|
||||||
|
follow_redirects=False
|
||||||
|
)
|
||||||
|
assert res.status_code in [404, 403], f"File {filename} should be blocked"
|
||||||
|
# Should not contain Python code indicators
|
||||||
|
assert b"import" not in res.data or b"# Test" in res.data # Allow "1 Imported" etc
|
||||||
|
|
||||||
|
# Test 5: Verify legitimate static files still work
|
||||||
|
# Note: We can't test actual files without knowing what exists,
|
||||||
|
# but we can verify the sanitization doesn't break valid groups
|
||||||
|
res = client.get(
|
||||||
|
"/static/images/test.png", # Will 404 if file doesn't exist, but won't traverse
|
||||||
|
follow_redirects=False
|
||||||
|
)
|
||||||
|
# Should get 404 (file not found) not 403 (blocked)
|
||||||
|
# This confirms the group name "images" is valid
|
||||||
|
assert res.status_code == 404
|
||||||
|
|
||||||
|
# Test 6: Ensure hyphens and dots are blocked in group names
|
||||||
|
res = client.get(
|
||||||
|
"/static/../../../etc/passwd",
|
||||||
|
follow_redirects=False
|
||||||
|
)
|
||||||
|
assert res.status_code in [404, 403]
|
||||||
|
assert b"root:" not in res.data
|
||||||
|
|
||||||
|
# Test 7: Test that underscores still work (they're allowed)
|
||||||
|
res = client.get(
|
||||||
|
"/static/visual_selector_data/test.json",
|
||||||
|
follow_redirects=False
|
||||||
|
)
|
||||||
|
# visual_selector_data is a real group, but requires auth
|
||||||
|
# Should get 403 (not authenticated) or 404 (file not found), not a path traversal
|
||||||
|
assert res.status_code in [403, 404]
|
||||||
|
|
||||||
|
|
||||||
|
def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
SSRF protection: IANA-reserved/private IP addresses are blocked at fetch-time, not add-time.
|
||||||
|
|
||||||
|
Watches targeting private/reserved IPs can be *added* freely; the block happens when the
|
||||||
|
fetcher actually tries to reach the URL (via validate_iana_url() in call_browser()).
|
||||||
|
|
||||||
|
Covers:
|
||||||
|
1. is_private_hostname() correctly classifies all reserved ranges
|
||||||
|
2. is_safe_valid_url() ALLOWS private-IP URLs at add-time (IANA check moved to fetch-time)
|
||||||
|
3. ALLOW_IANA_RESTRICTED_ADDRESSES has no effect on add-time; it only controls fetch-time
|
||||||
|
4. UI form accepts private-IP URLs at add-time without error
|
||||||
|
5. Requests fetcher blocks fetch-time DNS rebinding (fresh check on every fetch)
|
||||||
|
6. Requests fetcher blocks redirects that lead to a private IP (open-redirect bypass)
|
||||||
|
|
||||||
|
conftest.py sets ALLOW_IANA_RESTRICTED_ADDRESSES=true globally so the test
|
||||||
|
server (localhost) keeps working for all other tests. monkeypatch temporarily
|
||||||
|
overrides it to 'false' here, and is automatically restored after the test.
|
||||||
|
"""
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
from changedetectionio.validate_url import is_safe_valid_url, is_private_hostname
|
||||||
|
|
||||||
|
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 1. is_private_hostname() — unit tests across all reserved ranges
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
private_hosts = [
|
||||||
|
'127.0.0.1', # loopback
|
||||||
|
'10.0.0.1', # RFC 1918
|
||||||
|
'172.16.0.1', # RFC 1918
|
||||||
|
'192.168.1.1', # RFC 1918
|
||||||
|
'169.254.169.254', # link-local / AWS metadata endpoint
|
||||||
|
'::1', # IPv6 loopback
|
||||||
|
'fc00::1', # IPv6 unique local
|
||||||
|
'fe80::1', # IPv6 link-local
|
||||||
|
]
|
||||||
|
for host in private_hosts:
|
||||||
|
assert is_private_hostname(host), f"{host} should be identified as private/reserved"
|
||||||
|
|
||||||
|
for host in ['8.8.8.8', '1.1.1.1']:
|
||||||
|
assert not is_private_hostname(host), f"{host} should be identified as public"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 2. is_safe_valid_url() ALLOWS private-IP URLs at add-time
|
||||||
|
# IANA check is no longer done here — it moved to fetch-time validate_iana_url()
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
private_ip_urls = [
|
||||||
|
'http://127.0.0.1/',
|
||||||
|
'http://10.0.0.1/',
|
||||||
|
'http://172.16.0.1/',
|
||||||
|
'http://192.168.1.1/',
|
||||||
|
'http://169.254.169.254/',
|
||||||
|
'http://169.254.169.254/latest/meta-data/iam/security-credentials/',
|
||||||
|
'http://[::1]/',
|
||||||
|
'http://[fc00::1]/',
|
||||||
|
'http://[fe80::1]/',
|
||||||
|
]
|
||||||
|
for url in private_ip_urls:
|
||||||
|
assert is_safe_valid_url(url), f"{url} should be allowed by is_safe_valid_url (IANA check is at fetch-time)"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 3. ALLOW_IANA_RESTRICTED_ADDRESSES does not affect add-time validation
|
||||||
|
# It only controls fetch-time blocking inside validate_iana_url()
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'true')
|
||||||
|
assert is_safe_valid_url('http://127.0.0.1/'), \
|
||||||
|
"Private IP should be allowed at add-time regardless of ALLOW_IANA_RESTRICTED_ADDRESSES"
|
||||||
|
|
||||||
|
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
|
||||||
|
assert is_safe_valid_url('http://127.0.0.1/'), \
|
||||||
|
"Private IP should be allowed at add-time regardless of ALLOW_IANA_RESTRICTED_ADDRESSES"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 4. UI form accepts private-IP URLs at add-time
|
||||||
|
# The watch is created; the SSRF block fires later at fetch-time
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
for url in ['http://127.0.0.1/', 'http://169.254.169.254/latest/meta-data/']:
|
||||||
|
res = client.post(
|
||||||
|
url_for('ui.ui_views.form_quick_watch_add'),
|
||||||
|
data={'url': url, 'tags': ''},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b'Watch protocol is not permitted or invalid URL format' not in res.data, \
|
||||||
|
f"UI should accept {url} at add-time (SSRF is blocked at fetch-time)"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 5. Fetch-time DNS-rebinding check in the requests fetcher
|
||||||
|
# Simulates: URL passed add-time validation with a public IP, but
|
||||||
|
# by fetch time DNS has been rebound to a private IP.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
from changedetectionio.content_fetchers.requests import fetcher as RequestsFetcher
|
||||||
|
|
||||||
|
f = RequestsFetcher()
|
||||||
|
|
||||||
|
with patch('changedetectionio.content_fetchers.requests.is_private_hostname', return_value=True):
|
||||||
|
with pytest.raises(Exception, match='private/reserved'):
|
||||||
|
f._run_sync(
|
||||||
|
url='http://example.com/',
|
||||||
|
timeout=5,
|
||||||
|
request_headers={},
|
||||||
|
request_body=None,
|
||||||
|
request_method='GET',
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 6. Redirect-to-private-IP blocked (open-redirect SSRF bypass)
|
||||||
|
# Public host returns a 302 pointing at an IANA-reserved address.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
mock_redirect = MagicMock()
|
||||||
|
mock_redirect.is_redirect = True
|
||||||
|
mock_redirect.status_code = 302
|
||||||
|
mock_redirect.headers = {'Location': 'http://169.254.169.254/latest/meta-data/'}
|
||||||
|
|
||||||
|
def _private_only_for_redirect(hostname):
|
||||||
|
# Initial host is "public"; the redirect target is private
|
||||||
|
return hostname in {'169.254.169.254', '10.0.0.1', '172.16.0.1',
|
||||||
|
'192.168.0.1', '127.0.0.1', '::1'}
|
||||||
|
|
||||||
|
with patch('changedetectionio.content_fetchers.requests.is_private_hostname',
|
||||||
|
side_effect=_private_only_for_redirect):
|
||||||
|
with patch('requests.Session.request', return_value=mock_redirect):
|
||||||
|
with pytest.raises(Exception, match='Redirect blocked'):
|
||||||
|
f._run_sync(
|
||||||
|
url='http://example.com/',
|
||||||
|
timeout=5,
|
||||||
|
request_headers={},
|
||||||
|
request_body=None,
|
||||||
|
request_method='GET',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_unresolvable_hostname_is_allowed(client, live_server, monkeypatch):
|
||||||
|
"""
|
||||||
|
Unresolvable hostnames must NOT be blocked at add-time when ALLOW_IANA_RESTRICTED_ADDRESSES=false.
|
||||||
|
|
||||||
|
DNS failure (gaierror) at add-time does not mean the URL resolves to a private IP —
|
||||||
|
the domain may simply be offline or not yet live. Blocking it would be a false positive.
|
||||||
|
The real DNS-rebinding protection happens at fetch-time in call_browser().
|
||||||
|
"""
|
||||||
|
from changedetectionio.validate_url import is_safe_valid_url
|
||||||
|
|
||||||
|
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
|
||||||
|
|
||||||
|
url = 'http://this-host-does-not-exist-xyz987.invalid/some/path'
|
||||||
|
|
||||||
|
# Should pass URL validation despite being unresolvable
|
||||||
|
assert is_safe_valid_url(url), \
|
||||||
|
"Unresolvable hostname should pass is_safe_valid_url — DNS failure is not a private-IP signal"
|
||||||
|
|
||||||
|
# Should be accepted via the UI form and appear in the watch list
|
||||||
|
res = client.post(
|
||||||
|
url_for('ui.ui_views.form_quick_watch_add'),
|
||||||
|
data={'url': url, 'tags': ''},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b'Watch protocol is not permitted or invalid URL format' not in res.data, \
|
||||||
|
"UI should not reject a URL just because its hostname is unresolvable"
|
||||||
|
|
||||||
|
res = client.get(url_for('watchlist.index'))
|
||||||
|
assert b'this-host-does-not-exist-xyz987.invalid' in res.data, \
|
||||||
|
"Unresolvable hostname watch should appear in the watch overview list"
|
||||||
|
|||||||
@@ -0,0 +1,208 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test that changing global settings or tag configurations forces reprocessing.
|
||||||
|
|
||||||
|
When settings or tag configurations change, all affected watches need to
|
||||||
|
reprocess even if their content hasn't changed, because configuration affects
|
||||||
|
the processing result.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from .util import wait_for_all_checks
|
||||||
|
|
||||||
|
|
||||||
|
def test_settings_change_forces_reprocess(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that changing global settings clears all checksums to force reprocessing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Setup test content
|
||||||
|
test_html = """<html>
|
||||||
|
<body>
|
||||||
|
<p>Test content that stays the same</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||||
|
f.write(test_html)
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
# Add two watches
|
||||||
|
datastore = client.application.config.get('DATASTORE')
|
||||||
|
uuid1 = datastore.add_watch(url=test_url, extras={'title': 'Watch 1'})
|
||||||
|
uuid2 = datastore.add_watch(url=test_url, extras={'title': 'Watch 2'})
|
||||||
|
|
||||||
|
# Unpause watches
|
||||||
|
datastore.data['watching'][uuid1]['paused'] = False
|
||||||
|
datastore.data['watching'][uuid2]['paused'] = False
|
||||||
|
|
||||||
|
# First check - establishes baseline
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Verify checksum files were created
|
||||||
|
checksum1 = os.path.join(datastore_path, uuid1, 'last-checksum.txt')
|
||||||
|
checksum2 = os.path.join(datastore_path, uuid2, 'last-checksum.txt')
|
||||||
|
assert os.path.isfile(checksum1), "First check should create checksum file for watch 1"
|
||||||
|
assert os.path.isfile(checksum2), "First check should create checksum file for watch 2"
|
||||||
|
|
||||||
|
# Change global settings (any setting will do)
|
||||||
|
res = client.post(
|
||||||
|
url_for("settings.settings_page"),
|
||||||
|
data={
|
||||||
|
"application-empty_pages_are_a_change": "",
|
||||||
|
"requests-time_between_check-minutes": 180,
|
||||||
|
'application-fetch_backend': "html_requests"
|
||||||
|
},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Settings updated." in res.data
|
||||||
|
|
||||||
|
# Give it a moment to process
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# Verify ALL checksum files were deleted
|
||||||
|
assert not os.path.isfile(checksum1), "Settings change should delete checksum for watch 1"
|
||||||
|
assert not os.path.isfile(checksum2), "Settings change should delete checksum for watch 2"
|
||||||
|
|
||||||
|
# Next check should reprocess (not skip) and recreate checksums
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Verify checksum files were recreated
|
||||||
|
assert os.path.isfile(checksum1), "Reprocessing should recreate checksum file for watch 1"
|
||||||
|
assert os.path.isfile(checksum2), "Reprocessing should recreate checksum file for watch 2"
|
||||||
|
|
||||||
|
print("✓ Settings change forces reprocessing of all watches")
|
||||||
|
|
||||||
|
|
||||||
|
def test_tag_change_forces_reprocess(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that changing a tag configuration clears checksums only for watches with that tag.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Setup test content
|
||||||
|
test_html = """<html>
|
||||||
|
<body>
|
||||||
|
<p>Test content that stays the same</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||||
|
f.write(test_html)
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
# Create a tag
|
||||||
|
datastore = client.application.config.get('DATASTORE')
|
||||||
|
tag_uuid = datastore.add_tag('Test Tag')
|
||||||
|
|
||||||
|
# Add watches - one with tag, one without
|
||||||
|
uuid_with_tag = datastore.add_watch(url=test_url, extras={'title': 'Watch With Tag', 'tags': [tag_uuid]})
|
||||||
|
uuid_without_tag = datastore.add_watch(url=test_url, extras={'title': 'Watch Without Tag'})
|
||||||
|
|
||||||
|
# Unpause watches
|
||||||
|
datastore.data['watching'][uuid_with_tag]['paused'] = False
|
||||||
|
datastore.data['watching'][uuid_without_tag]['paused'] = False
|
||||||
|
|
||||||
|
# First check - establishes baseline
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Verify checksum files were created
|
||||||
|
checksum_with = os.path.join(datastore_path, uuid_with_tag, 'last-checksum.txt')
|
||||||
|
checksum_without = os.path.join(datastore_path, uuid_without_tag, 'last-checksum.txt')
|
||||||
|
assert os.path.isfile(checksum_with), "First check should create checksum for tagged watch"
|
||||||
|
assert os.path.isfile(checksum_without), "First check should create checksum for untagged watch"
|
||||||
|
|
||||||
|
# Edit the tag (change notification_muted as an example)
|
||||||
|
tag = datastore.data['settings']['application']['tags'][tag_uuid]
|
||||||
|
res = client.post(
|
||||||
|
url_for("tags.form_tag_edit_submit", uuid=tag_uuid),
|
||||||
|
data={
|
||||||
|
'title': 'Test Tag',
|
||||||
|
'notification_muted': 'y',
|
||||||
|
'overrides_watch': 'n'
|
||||||
|
},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated" in res.data
|
||||||
|
|
||||||
|
# Give it a moment to process
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# Verify ONLY the tagged watch's checksum was deleted
|
||||||
|
assert not os.path.isfile(checksum_with), "Tag change should delete checksum for watch WITH tag"
|
||||||
|
assert os.path.isfile(checksum_without), "Tag change should NOT delete checksum for watch WITHOUT tag"
|
||||||
|
|
||||||
|
# Next check should reprocess tagged watch and recreate its checksum
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Verify tagged watch's checksum was recreated
|
||||||
|
assert os.path.isfile(checksum_with), "Reprocessing should recreate checksum for tagged watch"
|
||||||
|
assert os.path.isfile(checksum_without), "Untagged watch should still have its checksum"
|
||||||
|
|
||||||
|
print("✓ Tag change forces reprocessing only for watches with that tag")
|
||||||
|
|
||||||
|
|
||||||
|
def test_tag_change_via_api_forces_reprocess(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that updating a tag via API also clears checksums for affected watches.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Setup test content
|
||||||
|
test_html = """<html>
|
||||||
|
<body>
|
||||||
|
<p>Test content</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||||
|
f.write(test_html)
|
||||||
|
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
|
# Create a tag
|
||||||
|
datastore = client.application.config.get('DATASTORE')
|
||||||
|
tag_uuid = datastore.add_tag('API Test Tag')
|
||||||
|
|
||||||
|
# Add watch with tag
|
||||||
|
uuid_with_tag = datastore.add_watch(url=test_url, extras={'title': 'API Watch'})
|
||||||
|
datastore.data['watching'][uuid_with_tag]['paused'] = False
|
||||||
|
datastore.data['watching'][uuid_with_tag]['tags'] = [tag_uuid]
|
||||||
|
datastore.data['watching'][uuid_with_tag].commit()
|
||||||
|
|
||||||
|
# First check
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Verify checksum exists
|
||||||
|
checksum_file = os.path.join(datastore_path, uuid_with_tag, 'last-checksum.txt')
|
||||||
|
assert os.path.isfile(checksum_file), "First check should create checksum file"
|
||||||
|
|
||||||
|
# Update tag via API
|
||||||
|
res = client.put(
|
||||||
|
f'/api/v1/tag/{tag_uuid}',
|
||||||
|
json={'notification_muted': True},
|
||||||
|
headers={'x-api-key': datastore.data['settings']['application']['api_access_token']}
|
||||||
|
)
|
||||||
|
assert res.status_code == 200, f"API call failed with status {res.status_code}: {res.data}"
|
||||||
|
|
||||||
|
# Give it more time for async operations
|
||||||
|
time.sleep(1.0)
|
||||||
|
|
||||||
|
# Debug: Check if checksum still exists
|
||||||
|
if os.path.isfile(checksum_file):
|
||||||
|
# Read checksum to see if it changed
|
||||||
|
with open(checksum_file, 'r') as f:
|
||||||
|
checksum_content = f.read()
|
||||||
|
print(f"Checksum still exists: {checksum_content}")
|
||||||
|
|
||||||
|
# Verify checksum was deleted
|
||||||
|
assert not os.path.isfile(checksum_file), "API tag update should delete checksum"
|
||||||
|
|
||||||
|
print("✓ Tag update via API forces reprocessing")
|
||||||
@@ -6,9 +6,6 @@ from urllib.request import urlopen
|
|||||||
from .util import set_original_response, set_modified_response, live_server_setup, delete_all_watches
|
from .util import set_original_response, set_modified_response, live_server_setup, delete_all_watches
|
||||||
import re
|
import re
|
||||||
|
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
|
|
||||||
def test_share_watch(client, live_server, measure_memory_usage, datastore_path):
|
def test_share_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||||
set_original_response(datastore_path=datastore_path)
|
set_original_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ from urllib.request import urlopen
|
|||||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||||
from ..diff import ADDED_STYLE
|
from ..diff import ADDED_STYLE
|
||||||
|
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
def test_check_basic_change_detection_functionality_source(client, live_server, measure_memory_usage, datastore_path):
|
def test_check_basic_change_detection_functionality_source(client, live_server, measure_memory_usage, datastore_path):
|
||||||
set_original_response(datastore_path=datastore_path)
|
set_original_response(datastore_path=datastore_path)
|
||||||
@@ -72,7 +71,10 @@ def test_check_ignore_elements(client, live_server, measure_memory_usage, datast
|
|||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||||
|
|||||||
@@ -2,7 +2,8 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from . util import live_server_setup, delete_all_watches
|
|
||||||
|
from .util import live_server_setup, delete_all_watches, wait_for_all_checks
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
@@ -25,9 +26,6 @@ def set_original_ignore_response(datastore_path):
|
|||||||
|
|
||||||
def test_trigger_regex_functionality_with_filter(client, live_server, measure_memory_usage, datastore_path):
|
def test_trigger_regex_functionality_with_filter(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
|
||||||
# live_server_setup(live_server) # Setup on conftest per function
|
|
||||||
sleep_time_for_fetch_thread = 3
|
|
||||||
|
|
||||||
set_original_ignore_response(datastore_path=datastore_path)
|
set_original_ignore_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
# Give the endpoint time to spin up
|
||||||
@@ -38,8 +36,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server, measure_me
|
|||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# it needs time to save the original version
|
wait_for_all_checks(client)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
|
||||||
### test regex with filter
|
### test regex with filter
|
||||||
res = client.post(
|
res = client.post(
|
||||||
@@ -52,8 +49,9 @@ def test_trigger_regex_functionality_with_filter(client, live_server, measure_me
|
|||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
client.get(url_for("ui.ui_diff.diff_history_page", uuid="first"))
|
client.get(url_for("ui.ui_diff.diff_history_page", uuid="first"))
|
||||||
|
|
||||||
@@ -62,7 +60,8 @@ def test_trigger_regex_functionality_with_filter(client, live_server, measure_me
|
|||||||
f.write("<html>some new noise with cool stuff2 ok</html>")
|
f.write("<html>some new noise with cool stuff2 ok</html>")
|
||||||
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (nothing should match the regex and filter)
|
# It should report nothing found (nothing should match the regex and filter)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
@@ -73,7 +72,8 @@ def test_trigger_regex_functionality_with_filter(client, live_server, measure_me
|
|||||||
f.write("<html>some new noise with <span id=in-here>cool stuff6</span> ok</html>")
|
f.write("<html>some new noise with <span id=in-here>cool stuff6</span> ok</html>")
|
||||||
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
|
||||||
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'has-unread-changes' in res.data
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,246 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test the watch edited flag functionality.
|
||||||
|
|
||||||
|
This tests the private __watch_was_edited flag that tracks when writable
|
||||||
|
watch fields are modified, which prevents skipping reprocessing when the
|
||||||
|
watch configuration has changed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
|
|
||||||
|
def set_test_content(datastore_path):
|
||||||
|
"""Write test HTML content to endpoint-content.txt for test server."""
|
||||||
|
test_html = """<html>
|
||||||
|
<body>
|
||||||
|
<p>Test content for watch edited flag tests</p>
|
||||||
|
<p>This content stays the same across checks</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||||
|
f.write(test_html)
|
||||||
|
|
||||||
|
|
||||||
|
def test_watch_edited_flag_lifecycle(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test the full lifecycle of the was_edited flag:
|
||||||
|
1. Flag starts False when watch is created
|
||||||
|
2. Flag becomes True when writable fields are modified
|
||||||
|
3. Flag is reset False after worker processing
|
||||||
|
4. Flag stays False when readonly fields are modified
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Setup - Add a watch
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("ui.ui_views.form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": "", "edit_and_watch_submit_button": "Edit > Watch"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Watch added" in res.data or b"Updated watch" in res.data
|
||||||
|
|
||||||
|
# Get the watch UUID
|
||||||
|
datastore = client.application.config.get('DATASTORE')
|
||||||
|
uuid = list(datastore.data['watching'].keys())[0]
|
||||||
|
watch = datastore.data['watching'][uuid]
|
||||||
|
|
||||||
|
# Reset flag after initial form submission (form sets fields which trigger the flag)
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
|
||||||
|
# Test 1: Flag should be False after reset
|
||||||
|
assert not watch.was_edited, "Flag should be False after reset"
|
||||||
|
|
||||||
|
# Test 2: Modify a writable field (title) - flag should become True
|
||||||
|
watch['title'] = 'New Title'
|
||||||
|
assert watch.was_edited, "Flag should be True after modifying writable field 'title'"
|
||||||
|
|
||||||
|
# Test 3: Reset flag manually (simulating what worker does)
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
assert not watch.was_edited, "Flag should be False after reset"
|
||||||
|
|
||||||
|
# Test 4: Modify another writable field (url) - flag should become True again
|
||||||
|
watch['url'] = 'https://example.com'
|
||||||
|
assert watch.was_edited, "Flag should be True after modifying writable field 'url'"
|
||||||
|
|
||||||
|
# Test 5: Reset and modify a readonly field - flag should stay False
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
assert not watch.was_edited, "Flag should be False after reset"
|
||||||
|
|
||||||
|
# Modify readonly field (uuid) - should not set flag
|
||||||
|
old_uuid = watch['uuid']
|
||||||
|
watch['uuid'] = 'readonly-test-uuid'
|
||||||
|
assert not watch.was_edited, "Flag should stay False when modifying readonly field 'uuid'"
|
||||||
|
watch['uuid'] = old_uuid # Restore original
|
||||||
|
|
||||||
|
# Note: Worker reset behavior is tested in test_check_removed_line_contains_trigger
|
||||||
|
# and test_watch_edited_flag_prevents_skip
|
||||||
|
|
||||||
|
print("✓ All watch edited flag lifecycle tests passed")
|
||||||
|
|
||||||
|
|
||||||
|
def test_watch_edited_flag_dict_methods(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that the flag is set correctly by various dict methods:
|
||||||
|
- __setitem__ (watch['key'] = value)
|
||||||
|
- update() (watch.update({'key': value}))
|
||||||
|
- setdefault() (watch.setdefault('key', default))
|
||||||
|
- pop() (watch.pop('key'))
|
||||||
|
- __delitem__ (del watch['key'])
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Setup - Add a watch
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("ui.ui_views.form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": "", "edit_and_watch_submit_button": "Edit > Watch"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
datastore = client.application.config.get('DATASTORE')
|
||||||
|
uuid = list(datastore.data['watching'].keys())[0]
|
||||||
|
watch = datastore.data['watching'][uuid]
|
||||||
|
|
||||||
|
# Test __setitem__
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
watch['title'] = 'Test via setitem'
|
||||||
|
assert watch.was_edited, "Flag should be True after __setitem__ on writable field"
|
||||||
|
|
||||||
|
# Test update() with dict
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
watch.update({'title': 'Test via update dict'})
|
||||||
|
assert watch.was_edited, "Flag should be True after update() with writable field"
|
||||||
|
|
||||||
|
# Test update() with kwargs
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
watch.update(title='Test via update kwargs')
|
||||||
|
assert watch.was_edited, "Flag should be True after update() kwargs with writable field"
|
||||||
|
|
||||||
|
# Test setdefault() on new key
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
watch.setdefault('title', 'Should not be set') # Key exists, no change
|
||||||
|
assert not watch.was_edited, "Flag should stay False when setdefault() doesn't change existing key"
|
||||||
|
|
||||||
|
watch.setdefault('custom_field', 'New value') # New key
|
||||||
|
assert watch.was_edited, "Flag should be True after setdefault() creates new writable field"
|
||||||
|
|
||||||
|
# Test pop() on writable field
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
watch.pop('custom_field', None)
|
||||||
|
assert watch.was_edited, "Flag should be True after pop() on writable field"
|
||||||
|
|
||||||
|
# Test __delitem__ on writable field
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
watch['temp_field'] = 'temp'
|
||||||
|
watch.reset_watch_edited_flag() # Reset after adding
|
||||||
|
del watch['temp_field']
|
||||||
|
assert watch.was_edited, "Flag should be True after __delitem__ on writable field"
|
||||||
|
|
||||||
|
print("✓ All dict methods correctly set the flag")
|
||||||
|
|
||||||
|
|
||||||
|
def test_watch_edited_flag_prevents_skip(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that the was_edited flag prevents skipping reprocessing.
|
||||||
|
When watch configuration is edited, it should reprocess even if content unchanged.
|
||||||
|
After worker processing, flag should be reset and subsequent checks can skip.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Setup test content
|
||||||
|
set_test_content(datastore_path)
|
||||||
|
|
||||||
|
# Setup - Add a watch
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("ui.ui_views.form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": "", "edit_and_watch_submit_button": "Edit > Watch"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Watch added" in res.data or b"Updated watch" in res.data
|
||||||
|
|
||||||
|
datastore = client.application.config.get('DATASTORE')
|
||||||
|
uuid = list(datastore.data['watching'].keys())[0]
|
||||||
|
watch = datastore.data['watching'][uuid]
|
||||||
|
|
||||||
|
# Unpause the watch (watches are paused by default in tests)
|
||||||
|
watch['paused'] = False
|
||||||
|
|
||||||
|
# Run first check to establish baseline
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Verify first check completed successfully - checksum file should exist
|
||||||
|
checksum_file = os.path.join(datastore_path, uuid, 'last-checksum.txt')
|
||||||
|
assert os.path.isfile(checksum_file), "First check should create last-checksum.txt file"
|
||||||
|
|
||||||
|
# Reset the was_edited flag (simulating clean state after processing)
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
assert not watch.was_edited, "Flag should be False after reset"
|
||||||
|
|
||||||
|
# Run second check without any changes - should skip via checksumFromPreviousCheckWasTheSame
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Verify it was skipped (last_check_status should indicate skip)
|
||||||
|
# Note: The actual skip is tested in test_check_removed_line_contains_trigger
|
||||||
|
# Here we're focused on the was_edited flag interaction
|
||||||
|
|
||||||
|
# Now modify the watch - flag should become True
|
||||||
|
watch['title'] = 'Modified Title'
|
||||||
|
assert watch.was_edited, "Flag should be True after modifying watch"
|
||||||
|
|
||||||
|
# Run third check - should NOT skip because was_edited=True even though content unchanged
|
||||||
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# After worker processing, the flag should be reset by the worker
|
||||||
|
# This reset happens in the processor's run() method after processing completes
|
||||||
|
assert not watch.was_edited, "Flag should be False after worker processing"
|
||||||
|
|
||||||
|
print("✓ was_edited flag correctly prevents skip and is reset by worker")
|
||||||
|
|
||||||
|
|
||||||
|
def test_watch_edited_flag_system_fields(client, live_server, measure_memory_usage, datastore_path):
|
||||||
|
"""
|
||||||
|
Test that system fields (readonly + additional system fields) don't trigger the flag.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Setup - Add a watch
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("ui.ui_views.form_quick_watch_add"),
|
||||||
|
data={"url": test_url, "tags": "", "edit_and_watch_submit_button": "Edit > Watch"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
datastore = client.application.config.get('DATASTORE')
|
||||||
|
uuid = list(datastore.data['watching'].keys())[0]
|
||||||
|
watch = datastore.data['watching'][uuid]
|
||||||
|
|
||||||
|
# Test readonly fields from OpenAPI spec
|
||||||
|
readonly_fields = ['uuid', 'date_created', 'last_viewed']
|
||||||
|
for field in readonly_fields:
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
if field in watch:
|
||||||
|
old_value = watch[field]
|
||||||
|
watch[field] = 'modified-readonly-value'
|
||||||
|
assert not watch.was_edited, f"Flag should stay False when modifying readonly field '{field}'"
|
||||||
|
watch[field] = old_value # Restore
|
||||||
|
|
||||||
|
# Test additional system fields not in OpenAPI spec yet
|
||||||
|
system_fields = ['last_check_status']
|
||||||
|
for field in system_fields:
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
watch[field] = 'system-value'
|
||||||
|
assert not watch.was_edited, f"Flag should stay False when modifying system field '{field}'"
|
||||||
|
|
||||||
|
# Test that content-type (readonly per OpenAPI) doesn't trigger flag
|
||||||
|
watch.reset_watch_edited_flag()
|
||||||
|
watch['content-type'] = 'text/html'
|
||||||
|
assert not watch.was_edited, "Flag should stay False when modifying 'content-type' (readonly)"
|
||||||
|
|
||||||
|
print("✓ System fields correctly don't trigger the flag")
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user