Compare commits

..

1 Commits

Author SHA1 Message Date
dgtlmoon cdabdfeef1 UI - Ability to download a complete data package (.zip) of a watch 2026-02-15 10:39:14 +01:00
148 changed files with 1084 additions and 20634 deletions
-33
View File
@@ -1,33 +0,0 @@
server {
listen 80;
server_name localhost;
# Test basic reverse proxy to changedetection.io
location / {
proxy_pass http://changedet-app:5000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# WebSocket support
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
# Test subpath deployment with X-Forwarded-Prefix
location /changedet-sub/ {
proxy_pass http://changedet-app:5000/;
proxy_set_header X-Forwarded-Prefix /changedet-sub;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# WebSocket support
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
}
+9 -26
View File
@@ -66,27 +66,27 @@ jobs:
echo ${{ github.ref }} > changedetectionio/tag.txt
- name: Set up QEMU
uses: docker/setup-qemu-action@v4
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:latest
platforms: all
- name: Login to GitHub Container Registry
uses: docker/login-action@v4
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Docker Hub Container Registry
uses: docker/login-action@v4
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v4
uses: docker/setup-buildx-action@v3
with:
install: true
version: latest
@@ -95,7 +95,7 @@ jobs:
# master branch -> :dev container tag
- name: Docker meta :dev
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
uses: docker/metadata-action@v6
uses: docker/metadata-action@v5
id: meta_dev
with:
images: |
@@ -103,19 +103,11 @@ jobs:
ghcr.io/${{ github.repository }}
tags: |
type=raw,value=dev
labels: |
org.opencontainers.image.created=${{ github.event.release.published_at }}
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
org.opencontainers.image.documentation=https://changedetection.io
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
org.opencontainers.image.title=changedetection.io
org.opencontainers.image.url=https://changedetection.io
- name: Build and push :dev
id: docker_build
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
uses: docker/build-push-action@v7
uses: docker/build-push-action@v6
with:
context: ./
file: ./Dockerfile
@@ -136,10 +128,10 @@ jobs:
echo "Release tag: ${{ github.event.release.tag_name }}"
echo "Github ref: ${{ github.ref }}"
echo "Github ref name: ${{ github.ref_name }}"
- name: Docker meta :tag
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
uses: docker/metadata-action@v6
uses: docker/metadata-action@v5
id: meta
with:
images: |
@@ -150,20 +142,11 @@ jobs:
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
type=raw,value=latest
labels: |
org.opencontainers.image.created=${{ github.event.release.published_at }}
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
org.opencontainers.image.documentation=https://changedetection.io
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
org.opencontainers.image.title=changedetection.io
org.opencontainers.image.url=https://changedetection.io
org.opencontainers.image.version=${{ github.event.release.tag_name }}
- name: Build and push :tag
id: docker_build_tag_release
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
uses: docker/build-push-action@v7
uses: docker/build-push-action@v6
with:
context: ./
file: ./Dockerfile
+3 -3
View File
@@ -21,7 +21,7 @@ jobs:
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v7
uses: actions/upload-artifact@v6
with:
name: python-package-distributions
path: dist/
@@ -34,7 +34,7 @@ jobs:
- build
steps:
- name: Download all the dists
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: python-package-distributions
path: dist/
@@ -93,7 +93,7 @@ jobs:
steps:
- name: Download all the dists
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: python-package-distributions
path: dist/
+3 -3
View File
@@ -60,14 +60,14 @@ jobs:
# Just test that the build works, some libraries won't compile on ARM/rPi etc
- name: Set up QEMU
uses: docker/setup-qemu-action@v4
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:latest
platforms: all
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v4
uses: docker/setup-buildx-action@v3
with:
install: true
version: latest
@@ -75,7 +75,7 @@ jobs:
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
id: docker_build
uses: docker/build-push-action@v7
uses: docker/build-push-action@v6
# https://github.com/docker/build-push-action#customizing
with:
context: ./
+1 -10
View File
@@ -52,13 +52,4 @@ jobs:
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.13'
skip-pypuppeteer: true
test-application-3-14:
#if: github.event_name == 'push' && github.ref == 'refs/heads/master'
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.14'
skip-pypuppeteer: false
skip-pypuppeteer: true
@@ -42,10 +42,10 @@ jobs:
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
uses: docker/setup-buildx-action@v3
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
uses: docker/build-push-action@v7
uses: docker/build-push-action@v6
with:
context: ./
file: ./Dockerfile
@@ -71,7 +71,7 @@ jobs:
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
- name: Upload Docker image artifact
uses: actions/upload-artifact@v7
uses: actions/upload-artifact@v6
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp/test-changedetectionio.tar
@@ -88,7 +88,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -99,7 +99,11 @@ jobs:
- name: Run Unit Tests
run: |
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest tests/unit/'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
# Basic pytest tests with ancillary services
basic-tests:
@@ -112,7 +116,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -161,14 +165,14 @@ jobs:
- name: Store test artifacts
if: always()
uses: actions/upload-artifact@v7
uses: actions/upload-artifact@v6
with:
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
path: output-logs
- name: Store CLI test output
if: always()
uses: actions/upload-artifact@v7
uses: actions/upload-artifact@v6
with:
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
path: cli-opts-output.txt
@@ -184,7 +188,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -226,7 +230,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -266,7 +270,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -302,7 +306,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -320,175 +324,6 @@ jobs:
run: |
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
nginx-reverse-proxy:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up services
run: |
docker network create changedet-network
# Start changedetection.io container with X-Forwarded headers support
docker run --name changedet-app --hostname changedet-app --network changedet-network \
-e USE_X_SETTINGS=true \
-d test-changedetectionio
sleep 3
- name: Start nginx reverse proxy
run: |
# Start nginx with our test configuration
docker run --name nginx-proxy --network changedet-network -d -p 8080:80 --rm \
-v ${{ github.workspace }}/.github/nginx-reverse-proxy-test.conf:/etc/nginx/conf.d/default.conf:ro \
nginx:alpine
sleep 2
- name: Test reverse proxy - root path
run: |
echo "=== Testing nginx reverse proxy at root path ==="
curl --retry-connrefused --retry 6 -s http://localhost:8080/ > /tmp/nginx-test-root.html
# Check for changedetection.io UI elements
if grep -q "checkbox-uuid" /tmp/nginx-test-root.html; then
echo "✓ Found checkbox-uuid in response"
else
echo "ERROR: checkbox-uuid not found in response"
cat /tmp/nginx-test-root.html
exit 1
fi
# Check for watchlist content
if grep -q -i "watch" /tmp/nginx-test-root.html; then
echo "✓ Found watch/watchlist content in response"
else
echo "ERROR: watchlist content not found"
cat /tmp/nginx-test-root.html
exit 1
fi
echo "✓ Root path reverse proxy working correctly"
- name: Test reverse proxy - subpath with X-Forwarded-Prefix
run: |
echo "=== Testing nginx reverse proxy at subpath /changedet-sub/ ==="
curl --retry-connrefused --retry 6 -s http://localhost:8080/changedet-sub/ > /tmp/nginx-test-subpath.html
# Check for changedetection.io UI elements
if grep -q "checkbox-uuid" /tmp/nginx-test-subpath.html; then
echo "✓ Found checkbox-uuid in subpath response"
else
echo "ERROR: checkbox-uuid not found in subpath response"
cat /tmp/nginx-test-subpath.html
exit 1
fi
echo "✓ Subpath reverse proxy working correctly"
- name: Test API through reverse proxy subpath
run: |
echo "=== Testing API endpoints through nginx subpath /changedet-sub/ ==="
# Extract API key from the changedetection.io datastore
API_KEY=$(docker exec changedet-app cat /datastore/changedetection.json | grep -o '"api_access_token": *"[^"]*"' | cut -d'"' -f4)
if [ -z "$API_KEY" ]; then
echo "ERROR: Could not extract API key from datastore"
docker exec changedet-app cat /datastore/changedetection.json
exit 1
fi
echo "✓ Extracted API key: ${API_KEY:0:8}..."
# Create a watch via API through nginx proxy subpath
echo "Creating watch via POST to /changedet-sub/api/v1/watch"
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/changedet-sub/api/v1/watch" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com/test-nginx-proxy",
"tag": "nginx-test"
}')
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" != "201" ]; then
echo "ERROR: Expected HTTP 201, got $HTTP_CODE"
echo "Response: $BODY"
exit 1
fi
echo "✓ Watch created successfully (HTTP 201)"
# Extract the watch UUID from response
WATCH_UUID=$(echo "$BODY" | grep -o '"uuid": *"[^"]*"' | cut -d'"' -f4)
echo "✓ Watch UUID: $WATCH_UUID"
# Update the watch via PUT through nginx proxy subpath
echo "Updating watch via PUT to /changedet-sub/api/v1/watch/${WATCH_UUID}"
RESPONSE=$(curl -s -w "\n%{http_code}" -X PUT "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
-d '{
"paused": true
}')
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" != "200" ]; then
echo "ERROR: Expected HTTP 200, got $HTTP_CODE"
echo "Response: $BODY"
exit 1
fi
if echo "$BODY" | grep -q 'OK'; then
echo "✓ Watch updated successfully (HTTP 200, response: OK)"
else
echo "ERROR: Expected response 'OK', got: $BODY"
echo "Response: $BODY"
exit 1
fi
# Verify the watch is paused via GET
echo "Verifying watch is paused via GET"
RESPONSE=$(curl -s "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
-H "x-api-key: ${API_KEY}")
if echo "$RESPONSE" | grep -q '"paused": *true'; then
echo "✓ Watch is paused as expected"
else
echo "ERROR: Watch paused state not confirmed"
echo "Response: $RESPONSE"
exit 1
fi
echo "✓ API tests through nginx subpath completed successfully"
- name: Cleanup nginx test
if: always()
run: |
docker logs nginx-proxy || true
docker logs changedet-app || true
docker stop nginx-proxy changedet-app || true
docker rm nginx-proxy changedet-app || true
# Proxy tests
proxy-tests:
runs-on: ubuntu-latest
@@ -500,7 +335,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -540,7 +375,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -570,7 +405,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -583,10 +418,6 @@ jobs:
run: |
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
- name: Plugin get_html_head_extras hook injects into base.html
run: |
docker run test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_html_head_extras.py'
# Container startup tests
container-tests:
runs-on: ubuntu-latest
@@ -598,7 +429,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -643,7 +474,7 @@ jobs:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v8
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -706,19 +537,7 @@ jobs:
- name: Check upgrade works without error
run: |
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
sudo apt-get update && sudo apt-get install -y --no-install-recommends \
g++ \
gcc \
libc-dev \
libffi-dev \
libjpeg-dev \
libssl-dev \
libxslt-dev \
make \
patch \
pkg-config \
zlib1g-dev
# Checkout old version and create datastore
git checkout 0.49.1
python3 -m venv .venv
@@ -727,7 +546,7 @@ jobs:
pip install 'pyOpenSSL>=23.2.0'
echo "=== Running version 0.49.1 to create datastore ==="
ALLOW_IANA_RESTRICTED_ADDRESSES=true python3 ./changedetection.py -C -d /tmp/data &
python3 ./changedetection.py -C -d /tmp/data &
APP_PID=$!
# Wait for app to be ready
@@ -775,7 +594,7 @@ jobs:
pip install -r requirements.txt
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
ALLOW_IANA_RESTRICTED_ADDRESSES=true TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
echo "=== Upgrade test output ==="
cat /tmp/upgrade-test.log
@@ -783,7 +602,7 @@ jobs:
# Now start the current version normally to verify the tag survived
echo "=== Starting current version to verify tag exists after upgrade ==="
ALLOW_IANA_RESTRICTED_ADDRESSES=true timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
APP_PID=$!
# Wait for app to be ready and fetch UI
@@ -832,7 +651,7 @@ jobs:
- name: Upload upgrade test logs
if: always()
uses: actions/upload-artifact@v7
uses: actions/upload-artifact@v6
with:
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
path: /tmp/upgrade-test.log
+2 -16
View File
@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1.
__version__ = '0.54.8'
__version__ = '0.52.9'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -61,22 +61,8 @@ import time
# ==============================================================================
import multiprocessing
import os
import sys
# Limit glibc malloc arena count to prevent RSS growth from concurrent requests.
# Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection
# can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever.
# With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster.
# Must be set before worker threads start; env var is read lazily by glibc on first arena creation.
if 'MALLOC_ARENA_MAX' not in os.environ:
os.environ['MALLOC_ARENA_MAX'] = '2'
try:
import ctypes as _ctypes
_ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8
except Exception:
pass
# Set spawn as global default (safety net - all our code uses explicit contexts anyway)
# Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions)
if 'pytest' not in sys.modules:
@@ -624,7 +610,7 @@ def main():
@app.context_processor
def inject_template_globals():
return dict(right_sticky="v"+__version__,
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
has_password=datastore.data['settings']['application']['password'] != False,
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
-21
View File
@@ -1,21 +0,0 @@
import functools
from flask import make_response
from flask_restful import Resource
@functools.cache
def _get_spec_yaml():
"""Build and cache the merged spec as a YAML string (only serialized once per process)."""
import yaml
from changedetectionio.api import build_merged_spec_dict
return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True)
class Spec(Resource):
def get(self):
"""Return the merged OpenAPI spec including all registered processor extensions."""
return make_response(
_get_spec_yaml(),
200,
{'Content-Type': 'application/yaml'}
)
+12 -8
View File
@@ -17,7 +17,7 @@ class Tag(Resource):
self.update_q = kwargs['update_q']
# Get information about a single tag
# curl http://localhost:5000/api/v1/tag/<uuid_str:uuid>
# curl http://localhost:5000/api/v1/tag/<string:uuid>
@auth.check_token
@validate_openapi_request('getTag')
def get(self, uuid):
@@ -97,6 +97,17 @@ class Tag(Resource):
# Delete the tag, and any tag reference
del self.datastore.data['settings']['application']['tags'][uuid]
# Delete tag.json file if it exists
import os
tag_dir = os.path.join(self.datastore.datastore_path, uuid)
tag_json = os.path.join(tag_dir, "tag.json")
if os.path.exists(tag_json):
try:
os.unlink(tag_json)
logger.info(f"Deleted tag.json for tag {uuid}")
except Exception as e:
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
# Remove tag from all watches
for watch_uuid, watch in self.datastore.data['watching'].items():
if watch.get('tags') and uuid in watch['tags']:
@@ -177,13 +188,6 @@ class Tag(Resource):
new_uuid = self.datastore.add_tag(title=title)
if new_uuid:
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
extra = {k: v for k, v in json_data.items() if k != 'title'}
if extra:
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
if tag:
tag.update(extra)
tag.commit()
return {'uuid': new_uuid}, 201
else:
return "Invalid or unsupported tag", 400
+5 -5
View File
@@ -57,7 +57,7 @@ class Watch(Resource):
self.update_q = kwargs['update_q']
# Get information about a single watch, excluding the history list (can be large)
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>
# curl http://localhost:5000/api/v1/watch/<string:uuid>
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
# ?recheck=true
@auth.check_token
@@ -217,7 +217,7 @@ class WatchHistory(Resource):
self.datastore = kwargs['datastore']
# Get a list of available history for a watch by UUID
# curl http://localhost:5000/api/v1/watch/<uuid_str:uuid>/history
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
@auth.check_token
@validate_openapi_request('getWatchHistory')
def get(self, uuid):
@@ -338,7 +338,7 @@ class WatchHistoryDiff(Resource):
word_diff = True
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
changes_only = strtobool(request.args.get('changesOnly', 'false'))
changes_only = strtobool(request.args.get('changesOnly', 'true'))
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
include_removed = strtobool(request.args.get('removed', 'true'))
include_added = strtobool(request.args.get('added', 'true'))
@@ -349,7 +349,7 @@ class WatchHistoryDiff(Resource):
previous_version_file_contents=from_version_file_contents,
newest_version_file_contents=to_version_file_contents,
ignore_junk=ignore_whitespace,
include_equal=not changes_only,
include_equal=changes_only,
include_removed=include_removed,
include_added=include_added,
include_replaced=include_replaced,
@@ -567,4 +567,4 @@ class CreateWatch(Resource):
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
return list, 200
return list, 200
+24 -80
View File
@@ -3,18 +3,29 @@ from flask import request, abort
from loguru import logger
@functools.cache
def build_merged_spec_dict():
def get_openapi_spec():
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
import os
import yaml # Lazy import - only loaded when API validation is actually used
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
if not os.path.exists(spec_path):
# Possibly for pip3 packages
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
with open(spec_path, 'r', encoding='utf-8') as f:
spec_dict = yaml.safe_load(f)
_openapi_spec = OpenAPI.from_dict(spec_dict)
return _openapi_spec
@functools.cache
def get_openapi_schema_dict():
"""
Load the base OpenAPI spec and merge in any per-processor api.yaml extensions.
Get the raw OpenAPI spec dictionary for schema access.
Each processor can provide an api.yaml file alongside its __init__.py that defines
additional schemas (e.g., processor_config_restock_diff). These are merged into
WatchBase.properties so the spec accurately reflects what the API accepts.
Plugin processors (via pluggy) are also supported - they just need an api.yaml
next to their processor module.
Returns the merged dict (cached - do not mutate the returned value).
Used by Import endpoint to validate and convert query parameters.
Returns the YAML dict directly (not the OpenAPI object).
"""
import os
import yaml
@@ -24,59 +35,7 @@ def build_merged_spec_dict():
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
with open(spec_path, 'r', encoding='utf-8') as f:
spec_dict = yaml.safe_load(f)
try:
from changedetectionio.processors import find_processors, get_parent_module
for module, proc_name in find_processors():
parent = get_parent_module(module)
if not parent or not hasattr(parent, '__file__'):
continue
api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml')
if not os.path.exists(api_yaml_path):
continue
with open(api_yaml_path, 'r', encoding='utf-8') as f:
proc_spec = yaml.safe_load(f)
# Merge schemas
proc_schemas = proc_spec.get('components', {}).get('schemas', {})
spec_dict['components']['schemas'].update(proc_schemas)
# Inject processor_config_{name} into WatchBase if the schema is defined
schema_key = f'processor_config_{proc_name}'
if schema_key in proc_schemas:
spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = {
'$ref': f'#/components/schemas/{schema_key}'
}
# Append x-code-samples from processor paths into existing path operations
for path, path_item in proc_spec.get('paths', {}).items():
if path not in spec_dict.get('paths', {}):
continue
for method, operation in path_item.items():
if method not in spec_dict['paths'][path]:
continue
if 'x-code-samples' in operation:
existing = spec_dict['paths'][path][method].get('x-code-samples', [])
spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples']
except Exception as e:
logger.warning(f"Failed to merge processor API specs: {e}")
return spec_dict
@functools.cache
def get_openapi_spec():
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
return OpenAPI.from_dict(build_merged_spec_dict())
@functools.cache
def get_openapi_schema_dict():
"""
Get the raw OpenAPI spec dictionary for schema access.
Used by Import endpoint to validate and convert query parameters.
Returns the merged YAML dict (not the OpenAPI object).
"""
return build_merged_spec_dict()
return yaml.safe_load(f)
@functools.cache
def _resolve_schema_properties(schema_name):
@@ -144,7 +103,6 @@ def validate_openapi_request(operation_id):
if request.method.upper() != 'GET':
# Lazy import - only loaded when actually validating a request
from openapi_core.contrib.flask import FlaskOpenAPIRequest
from openapi_core.templating.paths.exceptions import ServerNotFound, PathNotFound, PathError
spec = get_openapi_spec()
openapi_request = FlaskOpenAPIRequest(request)
@@ -152,16 +110,6 @@ def validate_openapi_request(operation_id):
if result.errors:
error_details = []
for error in result.errors:
# Skip path/server validation errors for reverse proxy compatibility
# Flask routing already validates that endpoints exist (returns 404 if not).
# OpenAPI validation here is primarily for request body schema validation.
# When behind nginx/reverse proxy, URLs may have path prefixes that don't
# match the OpenAPI server definitions, causing false positives.
if isinstance(error, PathError):
logger.debug(f"API Call - Skipping path/server validation (delegated to Flask): {error}")
continue
error_str = str(error)
# Extract detailed schema errors from __cause__
if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
for schema_error in error.__cause__.schema_errors:
@@ -169,12 +117,9 @@ def validate_openapi_request(operation_id):
msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
error_details.append(f"{field}: {msg}")
else:
error_details.append(error_str)
# Only raise if we have actual validation errors (not path/server issues)
if error_details:
error_details.append(str(error))
logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
except BadRequest:
# Re-raise BadRequest exceptions (validation failures)
raise
@@ -191,6 +136,5 @@ from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, Cr
from .Tags import Tags, Tag
from .Import import Import
from .SystemInfo import SystemInfo
from .Spec import Spec
from .Notifications import Notifications
+21 -32
View File
@@ -13,7 +13,7 @@ from loguru import logger
BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
def create_backup(datastore_path, watches: dict, tags: dict = None):
def create_backup(datastore_path, watches: dict):
logger.debug("Creating backup...")
import zipfile
from pathlib import Path
@@ -40,14 +40,10 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
zipObj.write(url_watches_json, arcname="url-watches.json")
logger.debug("Added url-watches.json to backup")
# Add tag data directories (each tag has its own {uuid}/tag.json)
for uuid, tag in (tags or {}).items():
for f in Path(tag.data_dir).glob('*'):
zipObj.write(f,
arcname=os.path.join(f.parts[-2], f.parts[-1]),
compress_type=zipfile.ZIP_DEFLATED,
compresslevel=8)
logger.debug(f"Added tag '{tag.get('title')}' ({uuid}) to backup")
# Add the flask app secret (if it exists)
secret_file = os.path.join(datastore_path, "secret.txt")
if os.path.isfile(secret_file):
zipObj.write(secret_file, arcname="secret.txt")
# Add any data in the watch data directory.
for uuid, w in watches.items():
@@ -92,28 +88,24 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
def construct_blueprint(datastore: ChangeDetectionStore):
from .restore import construct_restore_blueprint
backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
backups_blueprint.register_blueprint(construct_restore_blueprint(datastore))
backup_threads = []
@backups_blueprint.route("/request-backup", methods=['GET'])
@login_optionally_required
@backups_blueprint.route("/request-backup", methods=['GET'])
def request_backup():
if any(thread.is_alive() for thread in backup_threads):
flash(gettext("A backup is already running, check back in a few minutes"), "error")
return redirect(url_for('backups.create'))
return redirect(url_for('backups.index'))
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
flash(gettext("Maximum number of backups reached, please remove some"), "error")
return redirect(url_for('backups.create'))
return redirect(url_for('backups.index'))
# With immediate persistence, all data is already saved
zip_thread = threading.Thread(
target=create_backup,
args=(datastore.datastore_path, datastore.data.get("watching")),
kwargs={'tags': datastore.data['settings']['application'].get('tags', {})},
daemon=True,
name="BackupCreator"
)
@@ -121,7 +113,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
backup_threads.append(zip_thread)
flash(gettext("Backup building in background, check back in a few minutes."))
return redirect(url_for('backups.create'))
return redirect(url_for('backups.index'))
def find_backups():
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
@@ -141,43 +133,40 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return backup_info
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
@login_optionally_required
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
def download_backup(filename):
import re
filename = filename.strip()
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
abort(404)
# Resolve 'latest' before any validation so checks run against the real filename.
if filename == 'latest':
backups = find_backups()
if not backups:
abort(404)
filename = backups[0]['filename']
if not re.match(r"^" + backup_filename_regex + "$", filename):
abort(400) # Bad Request if the filename doesn't match the pattern
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
abort(404)
logger.debug(f"Backup download request for '{full_path}'")
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
@backups_blueprint.route("/", methods=['GET'])
@backups_blueprint.route("/create", methods=['GET'])
@login_optionally_required
def create():
@backups_blueprint.route("", methods=['GET'])
def index():
backups = find_backups()
output = render_template("backup_create.html",
output = render_template("overview.html",
available_backups=backups,
backup_running=any(thread.is_alive() for thread in backup_threads)
)
return output
@backups_blueprint.route("/remove-backups", methods=['GET'])
@login_optionally_required
@backups_blueprint.route("/remove-backups", methods=['GET'])
def remove_backups():
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
@@ -187,6 +176,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
flash(gettext("Backups were deleted."))
return redirect(url_for('backups.create'))
return redirect(url_for('backups.index'))
return backups_blueprint
@@ -1,248 +0,0 @@
import io
import json
import os
import re
import shutil
import tempfile
import threading
import zipfile
from flask import Blueprint, render_template, flash, url_for, redirect, request
from flask_babel import gettext, lazy_gettext as _l
from wtforms import Form, BooleanField, SubmitField
from flask_wtf.file import FileField, FileAllowed
from loguru import logger
from changedetectionio.flask_app import login_optionally_required
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
_UUID_RE = re.compile(
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
re.IGNORECASE,
)
class RestoreForm(Form):
zip_file = FileField(_l('Backup zip file'), validators=[
FileAllowed(['zip'], _l('Must be a .zip backup file!'))
])
include_groups = BooleanField(_l('Include groups'), default=True)
include_groups_replace_existing = BooleanField(_l('Replace existing groups of the same UUID'), default=True)
include_watches = BooleanField(_l('Include watches'), default=True)
include_watches_replace_existing = BooleanField(_l('Replace existing watches of the same UUID'), default=True)
submit = SubmitField(_l('Restore backup'))
def import_from_zip(zip_stream, datastore, include_groups, include_groups_replace, include_watches, include_watches_replace):
"""
Extract and import watches and groups from a backup zip stream.
Mirrors the store's _load_watches / _load_tags loading pattern:
- UUID dirs with tag.json → Tag.model + tag_obj.commit()
- UUID dirs with watch.json → rehydrate_entity + watch_obj.commit()
Returns a dict with counts: restored_groups, skipped_groups, restored_watches, skipped_watches.
Raises zipfile.BadZipFile if the stream is not a valid zip.
"""
from changedetectionio.model import Tag
restored_groups = 0
skipped_groups = 0
restored_watches = 0
skipped_watches = 0
current_tags = datastore.data['settings']['application'].get('tags', {})
current_watches = datastore.data['watching']
with tempfile.TemporaryDirectory() as tmpdir:
logger.debug(f"Restore: extracting zip to {tmpdir}")
with zipfile.ZipFile(zip_stream, 'r') as zf:
total_uncompressed = sum(m.file_size for m in zf.infolist())
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
raise ValueError(
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
)
resolved_dest = os.path.realpath(tmpdir)
for member in zf.infolist():
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
zf.extract(member, tmpdir)
logger.debug("Restore: zip extracted, scanning UUID directories")
for entry in os.scandir(tmpdir):
if not entry.is_dir():
continue
uuid = entry.name
if not _UUID_RE.match(uuid):
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
continue
tag_json_path = os.path.join(entry.path, 'tag.json')
watch_json_path = os.path.join(entry.path, 'watch.json')
# --- Tags (groups) ---
if include_groups and os.path.exists(tag_json_path):
if uuid in current_tags and not include_groups_replace:
logger.debug(f"Restore: skipping existing group {uuid} (replace not requested)")
skipped_groups += 1
continue
try:
with open(tag_json_path, 'r', encoding='utf-8') as f:
tag_data = json.load(f)
except (json.JSONDecodeError, IOError) as e:
logger.error(f"Restore: failed to read tag.json for {uuid}: {e}")
continue
title = tag_data.get('title', uuid)
logger.debug(f"Restore: importing group '{title}' ({uuid})")
# Mirror _load_tags: set uuid and force processor
tag_data['uuid'] = uuid
tag_data['processor'] = 'restock_diff'
# Copy the UUID directory so data_dir exists for commit()
dst_dir = os.path.join(datastore.datastore_path, uuid)
if os.path.exists(dst_dir):
shutil.rmtree(dst_dir)
shutil.copytree(entry.path, dst_dir)
tag_obj = Tag.model(
datastore_path=datastore.datastore_path,
__datastore=datastore.data,
default=tag_data
)
current_tags[uuid] = tag_obj
tag_obj.commit()
restored_groups += 1
logger.success(f"Restore: group '{title}' ({uuid}) restored")
# --- Watches ---
elif include_watches and os.path.exists(watch_json_path):
if uuid in current_watches and not include_watches_replace:
logger.debug(f"Restore: skipping existing watch {uuid} (replace not requested)")
skipped_watches += 1
continue
try:
with open(watch_json_path, 'r', encoding='utf-8') as f:
watch_data = json.load(f)
except (json.JSONDecodeError, IOError) as e:
logger.error(f"Restore: failed to read watch.json for {uuid}: {e}")
continue
url = watch_data.get('url', uuid)
logger.debug(f"Restore: importing watch '{url}' ({uuid})")
# Copy UUID directory first so data_dir and history files exist
dst_dir = os.path.join(datastore.datastore_path, uuid)
if os.path.exists(dst_dir):
shutil.rmtree(dst_dir)
shutil.copytree(entry.path, dst_dir)
# Mirror _load_watches / rehydrate_entity
watch_data['uuid'] = uuid
watch_obj = datastore.rehydrate_entity(uuid, watch_data)
current_watches[uuid] = watch_obj
watch_obj.commit()
restored_watches += 1
logger.success(f"Restore: watch '{url}' ({uuid}) restored")
logger.debug(f"Restore: scan complete - groups {restored_groups} restored / {skipped_groups} skipped, "
f"watches {restored_watches} restored / {skipped_watches} skipped")
# Persist changedetection.json (includes the updated tags dict)
logger.debug("Restore: committing datastore settings")
datastore.commit()
return {
'restored_groups': restored_groups,
'skipped_groups': skipped_groups,
'restored_watches': restored_watches,
'skipped_watches': skipped_watches,
}
def construct_restore_blueprint(datastore):
restore_blueprint = Blueprint('restore', __name__, template_folder="templates")
restore_threads = []
@restore_blueprint.route("/restore", methods=['GET'])
@login_optionally_required
def restore():
form = RestoreForm()
return render_template("backup_restore.html",
form=form,
restore_running=any(t.is_alive() for t in restore_threads),
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
@restore_blueprint.route("/restore/start", methods=['POST'])
@login_optionally_required
def backups_restore_start():
if any(t.is_alive() for t in restore_threads):
flash(gettext("A restore is already running, check back in a few minutes"), "error")
return redirect(url_for('backups.restore.restore'))
zip_file = request.files.get('zip_file')
if not zip_file or not zip_file.filename:
flash(gettext("No file uploaded"), "error")
return redirect(url_for('backups.restore.restore'))
if not zip_file.filename.lower().endswith('.zip'):
flash(gettext("File must be a .zip backup file"), "error")
return redirect(url_for('backups.restore.restore'))
# Reject oversized uploads before reading the stream into memory.
content_length = request.content_length
if content_length and content_length > _MAX_UPLOAD_BYTES:
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
return redirect(url_for('backups.restore.restore'))
# Read into memory now — the request stream is gone once we return.
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
try:
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
if len(raw) > _MAX_UPLOAD_BYTES:
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
return redirect(url_for('backups.restore.restore'))
zip_bytes = io.BytesIO(raw)
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
pass
zip_bytes.seek(0)
except zipfile.BadZipFile:
flash(gettext("Invalid or corrupted zip file"), "error")
return redirect(url_for('backups.restore.restore'))
include_groups = request.form.get('include_groups') == 'y'
include_groups_replace = request.form.get('include_groups_replace_existing') == 'y'
include_watches = request.form.get('include_watches') == 'y'
include_watches_replace = request.form.get('include_watches_replace_existing') == 'y'
restore_thread = threading.Thread(
target=import_from_zip,
kwargs={
'zip_stream': zip_bytes,
'datastore': datastore,
'include_groups': include_groups,
'include_groups_replace': include_groups_replace,
'include_watches': include_watches,
'include_watches_replace': include_watches_replace,
},
daemon=True,
name="BackupRestore"
)
restore_thread.start()
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
restore_threads.append(restore_thread)
flash(gettext("Restore started in background, check back in a few minutes."))
return redirect(url_for('backups.restore.restore'))
return restore_blueprint
@@ -1,49 +0,0 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_simple_field, render_field %}
<div class="edit-form">
<div class="tabs collapsable">
<ul>
<li class="tab active" id=""><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
<li class="tab"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
</ul>
</div>
<div class="box-wrap inner">
<div id="general">
{% if backup_running %}
<p>
<span class="spinner"></span>&nbsp;<strong>{{ _('A backup is running!') }}</strong>
</p>
{% endif %}
<p>
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
</p>
<br>
{% if available_backups %}
<ul>
{% for backup in available_backups %}
<li>
<a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}
</li>
{% endfor %}
</ul>
{% else %}
<p>
<strong>{{ _('No backups found.') }}</strong>
</p>
{% endif %}
<a class="pure-button pure-button-primary"
href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
{% if available_backups %}
<a class="pure-button button-small button-error "
href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
{% endif %}
</div>
</div>
</div>
{% endblock %}
@@ -1,61 +0,0 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field %}
<div class="edit-form">
<div class="tabs collapsable">
<ul>
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Create') }}</a></li>
<li class="tab active"><a href="{{ url_for('backups.restore.restore') }}">{{ _('Restore') }}</a></li>
</ul>
</div>
<div class="box-wrap inner">
<div id="general">
{% if restore_running %}
<p>
<span class="spinner"></span>&nbsp;<strong>{{ _('A restore is running!') }}</strong>
</p>
{% endif %}
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
<p class="pure-form-message">
{{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
</p>
<form class="pure-form pure-form-stacked settings"
action="{{ url_for('backups.restore.backups_restore_start') }}"
method="POST"
enctype="multipart/form-data">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<div class="pure-control-group">
{{ render_checkbox_field(form.include_groups) }}
<span class="pure-form-message-inline">{{ _('Include all groups found in backup?') }}</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.include_groups_replace_existing) }}
<span class="pure-form-message-inline">{{ _('Replace any existing groups of the same UUID?') }}</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.include_watches) }}
<span class="pure-form-message-inline">{{ _('Include all watches found in backup?') }}</span>
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.include_watches_replace_existing) }}
<span class="pure-form-message-inline">{{ _('Replace any existing watches of the same UUID?') }}</span>
</div>
<div class="pure-control-group">
{{ render_field(form.zip_file) }}
</div>
<div class="pure-controls">
<button type="submit" class="pure-button pure-button-primary">{{ _('Restore backup') }}</button>
</div>
</form>
</div>
</div>
</div>
{% endblock %}
@@ -0,0 +1,36 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_simple_field, render_field %}
<div class="edit-form">
<div class="box-wrap inner">
<h2>{{ _('Backups') }}</h2>
{% if backup_running %}
<p>
<span class="spinner"></span>&nbsp;<strong>{{ _('A backup is running!') }}</strong>
</p>
{% endif %}
<p>
{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}
</p>
<br>
{% if available_backups %}
<ul>
{% for backup in available_backups %}
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} {{ _('Mb') }}</li>
{% endfor %}
</ul>
{% else %}
<p>
<strong>{{ _('No backups found.') }}</strong>
</p>
{% endif %}
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">{{ _('Create backup') }}</a>
{% if available_backups %}
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">{{ _('Remove backups') }}</a>
{% endif %}
</div>
</div>
{% endblock %}
@@ -102,35 +102,6 @@ def run_async_in_browser_loop(coro):
else:
raise RuntimeError("Browser steps event loop is not available")
async def _close_session_resources(session_data, label=''):
"""Close all browser resources for a session in the correct order.
browserstepper.cleanup() closes page+context but not the browser itself.
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
"""
browserstepper = session_data.get('browserstepper')
if browserstepper:
try:
await browserstepper.cleanup()
except Exception as e:
logger.error(f"Error cleaning up browserstepper{label}: {e}")
browser = session_data.get('browser')
if browser:
try:
await asyncio.wait_for(browser.close(), timeout=5.0)
except Exception as e:
logger.warning(f"Error closing browser{label}: {e}")
playwright_context = session_data.get('playwright_context')
if playwright_context:
try:
await playwright_context.stop()
except Exception as e:
logger.warning(f"Error stopping playwright context{label}: {e}")
def cleanup_expired_sessions():
"""Remove expired browsersteps sessions and cleanup their resources"""
global browsersteps_sessions, browsersteps_watch_to_session
@@ -148,10 +119,13 @@ def cleanup_expired_sessions():
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
session_data = browsersteps_sessions[session_id]
try:
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}"))
except Exception as e:
logger.error(f"Error cleaning up session {session_id}: {e}")
# Cleanup playwright resources asynchronously
browserstepper = session_data.get('browserstepper')
if browserstepper:
try:
run_async_in_browser_loop(browserstepper.cleanup())
except Exception as e:
logger.error(f"Error cleaning up session {session_id}: {e}")
# Remove from sessions dict
del browsersteps_sessions[session_id]
@@ -178,10 +152,12 @@ def cleanup_session_for_watch(watch_uuid):
session_data = browsersteps_sessions.get(session_id)
if session_data:
try:
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}"))
except Exception as e:
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
browserstepper = session_data.get('browserstepper')
if browserstepper:
try:
run_async_in_browser_loop(browserstepper.cleanup())
except Exception as e:
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
# Remove from sessions dict
del browsersteps_sessions[session_id]
@@ -198,80 +174,71 @@ def construct_blueprint(datastore: ChangeDetectionStore):
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
async def start_browsersteps_session(watch_uuid):
from changedetectionio.browser_steps import browser_steps
from . import browser_steps
import time
from playwright.async_api import async_playwright
# We keep the playwright session open for many minutes
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
keepalive_ms = ((keepalive_seconds + 3) * 1000)
browsersteps_start_session = {'start_time': time.time()}
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
proxy = None
if proxy_id:
proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url')
if proxy_url:
from urllib.parse import urlparse
parsed = urlparse(proxy_url)
proxy = {'server': proxy_url}
if parsed.username:
proxy['username'] = parsed.username
if parsed.password:
proxy['password'] = parsed.password
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
# Create a new async playwright instance for browser steps
playwright_instance = async_playwright()
playwright_context = await playwright_instance.start()
# Resolve the fetcher class for this watch so we can ask it to launch its own browser
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
watch = datastore.data['watching'][watch_uuid]
from changedetectionio import content_fetchers
fetcher_name = watch.get_fetch_backend or 'system'
if fetcher_name == 'system':
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
fetcher_class = getattr(content_fetchers, fetcher_name, None)
browser = None
playwright_context = None
# If the fetcher has its own browser launch for the live steps UI, use it.
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
# or None to fall back to the default CDP path.
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
if result is not None:
browser, playwright_context = result
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_name}'")
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
if browser is None:
playwright_instance = async_playwright()
playwright_context = await playwright_instance.start()
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
a = "?" if '?' not in base_url else '&'
base_url += a + f"timeout={keepalive_ms}"
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
keepalive_ms = ((keepalive_seconds + 3) * 1000)
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
a = "?" if not '?' in base_url else '&'
base_url += a + f"timeout={keepalive_ms}"
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
browsersteps_start_session['browser'] = browser
browsersteps_start_session['playwright_context'] = playwright_context
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
proxy = None
if proxy_id:
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
if proxy_url:
# Playwright needs separate username and password values
from urllib.parse import urlparse
parsed = urlparse(proxy_url)
proxy = {'server': proxy_url}
if parsed.username:
proxy['username'] = parsed.username
if parsed.password:
proxy['password'] = parsed.password
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
browserstepper = browser_steps.browsersteps_live_ui(
playwright_browser=browser,
proxy=proxy,
start_url=watch.link,
headers=watch.get('headers')
start_url=datastore.data['watching'][watch_uuid].link,
headers=datastore.data['watching'][watch_uuid].get('headers')
)
# Initialize the async connection
await browserstepper.connect(proxy=proxy)
browsersteps_start_session['browserstepper'] = browserstepper
# For test
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
return browsersteps_start_session
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
@login_optionally_required
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
def browsersteps_start_session():
# A new session was requested, return sessionID
import asyncio
import uuid
browsersteps_session_id = str(uuid.uuid4())
watch_uuid = request.args.get('uuid')
@@ -304,8 +271,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
logger.debug("Starting connection with playwright - done")
return {'browsersteps_session_id': browsersteps_session_id}
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
@login_optionally_required
@browser_steps_blueprint.route("/browsersteps_image", methods=['GET'])
def browser_steps_fetch_screenshot_image():
from flask import (
make_response,
@@ -330,14 +297,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401)
# A request for an action was received
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
@login_optionally_required
@browser_steps_blueprint.route("/browsersteps_update", methods=['POST'])
def browsersteps_ui_update():
import base64
import playwright._impl._errors
from changedetectionio.blueprint.browser_steps import browser_steps
remaining = 0
remaining =0
uuid = request.args.get('uuid')
goto_website_url_first_step = request.args.get('goto_website_url_first_step')
browsersteps_session_id = request.args.get('browsersteps_session_id')
@@ -348,33 +316,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response('No session exists under that ID', 500)
is_last_step = False
# @todo - should always be an existing session
if goto_website_url_first_step:
logger.debug("Going to site (requested automatically before stepping)..")
step_operation = "Goto site"
step_selector = None
step_optional_value = None
else:
# Actions - step/apply/etc, do the thing and return state
if request.method == 'POST':
# @todo - should always be an existing session
step_operation = request.form.get('operation')
step_selector = request.form.get('selector')
step_optional_value = request.form.get('optional_value')
is_last_step = strtobool(request.form.get('is_last_step'))
try:
# Run the async call_action method in the dedicated browser steps event loop
run_async_in_browser_loop(
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
action_name=step_operation,
selector=step_selector,
optional_value=step_optional_value
try:
# Run the async call_action method in the dedicated browser steps event loop
run_async_in_browser_loop(
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
action_name=step_operation,
selector=step_selector,
optional_value=step_optional_value
)
)
)
except Exception as e:
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
# Try to find something of value to give back to the user
return make_response(str(e).splitlines()[0], 401)
except Exception as e:
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
# Try to find something of value to give back to the user
return make_response(str(e).splitlines()[0], 401)
# if not this_session.page:
# cleanup_playwright_session()
# return make_response('Browser session ran out of time :( Please reload this page.', 401)
# Screenshots and other info only needed on requesting a step (POST)
try:
@@ -382,7 +350,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
(screenshot, xpath_data) = run_async_in_browser_loop(
browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
)
if is_last_step:
watch = datastore.data['watching'].get(uuid)
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
@@ -8,17 +8,6 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
from changedetectionio.content_fetchers.base import manage_user_agent
from changedetectionio.jinja2_custom import render as jinja_render
def browser_steps_get_valid_steps(browser_steps: list):
if browser_steps is not None and len(browser_steps):
valid_steps = list(filter(
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one'),browser_steps))
# Just incase they selected Goto site by accident with older JS
if valid_steps and valid_steps[0]['operation'] == 'Goto site':
del(valid_steps[0])
return valid_steps
return []
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
@@ -40,13 +40,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
contents = ''
now = time.time()
try:
import asyncio
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid
)
asyncio.run(update_handler.call_browser(preferred_proxy_id=preferred_proxy))
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
# title, size is len contents not len xfer
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
if e.status_code == 404:
@@ -95,13 +94,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return results
@login_required
@check_proxies_blueprint.route("/<uuid_str:uuid>/status", methods=['GET'])
@check_proxies_blueprint.route("/<string:uuid>/status", methods=['GET'])
def get_recheck_status(uuid):
results = _recalc_check_status(uuid=uuid)
return results
@login_required
@check_proxies_blueprint.route("/<uuid_str:uuid>/start", methods=['GET'])
@check_proxies_blueprint.route("/<string:uuid>/start", methods=['GET'])
def start_check(uuid):
if not datastore.proxy_list:
@@ -9,7 +9,6 @@
<li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
<li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
<li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
<li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
</ul>
</div>
@@ -17,11 +16,6 @@
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<div class="tab-pane-inner" id="url-list">
<p>
{{ _('Restoring changedetection.io backups is in the') }}<a href="{{ url_for('backups.restore.restore') }}"> {{ _('backups section') }}</a>.
<br>
</p>
<div class="pure-control-group">
{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}
<br>
@@ -43,6 +37,9 @@
</div>
<div class="tab-pane-inner" id="distill-io">
<div class="pure-control-group">
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}<br>
{{ _('This is') }} <i>{{ _('experimental') }}</i>, {{ _('supported fields are') }} <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, {{ _('the rest (including') }} <code>schedule</code>) {{ _('are ignored.') }}
@@ -52,6 +49,8 @@
{{ _('Be sure to set your default fetcher to Chrome if required.') }}<br>
</p>
</div>
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
font-family:monospace;
white-space: pre;
@@ -115,7 +114,6 @@
</div>
</div>
<button type="submit" class="pure-button pure-input-1-2 pure-button-primary">{{ _('Import') }}</button>
</form>
</div>
@@ -15,7 +15,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
price_data_follower_blueprint = Blueprint('price_data_follower', __name__)
@login_required
@price_data_follower_blueprint.route("/<uuid_str:uuid>/accept", methods=['GET'])
@price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET'])
def accept(uuid):
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
@@ -25,7 +25,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
return redirect(url_for("watchlist.index"))
@login_required
@price_data_follower_blueprint.route("/<uuid_str:uuid>/reject", methods=['GET'])
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
def reject(uuid):
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
datastore.data['watching'][uuid].commit()
@@ -9,12 +9,11 @@ def construct_single_watch_routes(rss_blueprint, datastore):
datastore: The ChangeDetectionStore instance
"""
@rss_blueprint.route("/watch/<uuid_str:uuid>", methods=['GET'])
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
def rss_single_watch(uuid):
import time
from flask import make_response, request, Response
from flask_babel import lazy_gettext as _l
from flask import make_response, request
from feedgen.feed import FeedGenerator
from loguru import logger
@@ -43,12 +42,12 @@ def construct_single_watch_routes(rss_blueprint, datastore):
# Get the watch by UUID
watch = datastore.data['watching'].get(uuid)
if not watch:
return Response(_l("Watch with UUID %(uuid)s not found", uuid=uuid), status=404, mimetype='text/plain')
return f"Watch with UUID {uuid} not found", 404
# Check if watch has at least 2 history snapshots
dates = list(watch.history.keys())
if len(dates) < 2:
return Response(_l("Watch %(uuid)s does not have enough history snapshots to show changes (need at least 2)", uuid=uuid), status=400, mimetype='text/plain')
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
# Get the number of diffs to include (default: 5)
rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5)
+1 -1
View File
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
datastore: The ChangeDetectionStore instance
"""
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
def rss_tag_feed(tag_uuid):
from flask import make_response, request, url_for
@@ -25,7 +25,7 @@
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
<li class="tab"><a href="{{ url_for('backups.create') }}">{{ _('Backups') }}</a></li>
<li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
{% if plugin_tabs %}
@@ -154,8 +154,9 @@
</span>
</div>
<div class="pure-control-group">
<br>
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
<br>
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
</div>
</div>
@@ -351,7 +352,7 @@ nav
</div>
</div>
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
<div class="pure-control-group" id="extra-proxies-setting">
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
+30 -34
View File
@@ -22,14 +22,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
from changedetectionio import processors
output = render_template("groups-overview.html",
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
available_tags=sorted_tags,
form=add_form,
generate_tag_colors=processors.generate_processor_badge_colors,
tag_count=tag_count,
wcag_text_color=processors.wcag_text_color,
)
return output
@@ -57,7 +54,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return redirect(url_for('tags.tags_overview_page'))
@tags_blueprint.route("/mute/<uuid_str:uuid>", methods=['GET'])
@tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
@login_optionally_required
def mute(uuid):
tag = datastore.data['settings']['application']['tags'].get(uuid)
@@ -66,13 +63,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
tag.commit()
return redirect(url_for('tags.tags_overview_page'))
@tags_blueprint.route("/delete/<uuid_str:uuid>", methods=['GET'])
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
@login_optionally_required
def delete(uuid):
# Delete the tag from settings immediately
if datastore.data['settings']['application']['tags'].get(uuid):
del datastore.data['settings']['application']['tags'][uuid]
# Delete tag.json file if it exists
import os
tag_dir = os.path.join(datastore.datastore_path, uuid)
tag_json = os.path.join(tag_dir, "tag.json")
if os.path.exists(tag_json):
try:
os.unlink(tag_json)
logger.info(f"Deleted tag.json for tag {uuid}")
except Exception as e:
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
# Remove tag from all watches in background thread to avoid blocking
def remove_tag_background(tag_uuid):
"""Background thread to remove tag from watches - discarded after completion."""
@@ -93,7 +101,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
flash(gettext("Tag deleted, removing from watches in background"))
return redirect(url_for('tags.tags_overview_page'))
@tags_blueprint.route("/unlink/<uuid_str:uuid>", methods=['GET'])
@tags_blueprint.route("/unlink/<string:uuid>", methods=['GET'])
@login_optionally_required
def unlink(uuid):
# Unlink tag from all watches in background thread to avoid blocking
@@ -119,11 +127,19 @@ def construct_blueprint(datastore: ChangeDetectionStore):
@tags_blueprint.route("/delete_all", methods=['GET'])
@login_optionally_required
def delete_all():
# Delete all tag.json files
import os
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
# TagsDict 'del' handler will remove the dir
del datastore.data['settings']['application']['tags'][tag_uuid]
tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
tag_json = os.path.join(tag_dir, "tag.json")
if os.path.exists(tag_json):
try:
os.unlink(tag_json)
except Exception as e:
logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")
# Clear all tags from settings immediately
datastore.data['settings']['application']['tags'] = {}
# Clear tags from all watches in background thread to avoid blocking
def clear_all_tags_background():
@@ -144,7 +160,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
flash(gettext("All tags deleted, clearing from watches in background"))
return redirect(url_for('tags.tags_overview_page'))
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET'])
@tags_blueprint.route("/edit/<string:uuid>", methods=['GET'])
@login_optionally_required
def form_tag_edit(uuid):
from changedetectionio.blueprint.tags.form import group_restock_settings_form
@@ -163,21 +179,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
default_system_settings = datastore.data['settings'],
)
# Bridge API-stored processor_config_* values into the form's FormField sub-forms.
# The API stores processor_config_restock_diff in the tag dict; find the matching
# FormField by checking which one's sub-fields cover the config keys.
from wtforms.fields.form import FormField as WTFormField
for key, value in default.items():
if not key.startswith('processor_config_') or not isinstance(value, dict):
continue
for form_field in form:
if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value):
for sub_key, sub_value in value.items():
sub_field = form_field.form._fields.get(sub_key)
if sub_field is not None:
sub_field.data = sub_value
break
template_args = {
'data': default,
'form': form,
@@ -211,17 +212,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
template = env.from_string(template_str)
included_content = template.render(**template_args)
# Watches whose URL currently matches this tag's pattern
matching_watches = {
w_uuid: watch
for w_uuid, watch in datastore.data['watching'].items()
if default.matches_url(watch.get('url', ''))
}
output = render_template("edit-tag.html",
extra_form_content=included_content,
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
matching_watches=matching_watches,
settings_application=datastore.data['settings']['application'],
**template_args
)
@@ -229,7 +222,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return output
@tags_blueprint.route("/edit/<uuid_str:uuid>", methods=['POST'])
@tags_blueprint.route("/edit/<string:uuid>", methods=['POST'])
@login_optionally_required
def form_tag_edit_submit(uuid):
from changedetectionio.blueprint.tags.form import group_restock_settings_form
@@ -262,4 +255,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return redirect(url_for('tags.tags_overview_page'))
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
def form_tag_delete(uuid):
return redirect(url_for('tags.tags_overview_page'))
return tags_blueprint
+4 -3
View File
@@ -10,11 +10,12 @@ from changedetectionio.processors.restock_diff.forms import processor_settings_f
class group_restock_settings_form(restock_settings_form):
overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False)
url_match_pattern = StringField('Auto-apply to watches with URLs matching',
render_kw={"placeholder": "e.g. *://example.com/* or github.com/myorg"})
tag_colour = StringField('Tag colour', default='')
class SingleTag(Form):
name = StringField('Tag name', [validators.InputRequired()], render_kw={"placeholder": "Name"})
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
@@ -43,46 +43,6 @@
<div class="pure-control-group">
{{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}
</div>
<div class="pure-control-group">
{{ render_field(form.url_match_pattern, class="m-d") }}
<span class="pure-form-message-inline">{{ _('Automatically applies this tag to any watch whose URL matches. Supports wildcards: <code>*example.com*</code> or plain substring: <code>github.com/myorg</code>')|safe }}</span>
</div>
{% if matching_watches %}
<div class="pure-control-group">
<label>{{ _('Currently matching watches') }} ({{ matching_watches|length }})</label>
<ul class="tag-url-match-list">
{% for w_uuid, w in matching_watches.items() %}
<li><a href="{{ url_for('ui.ui_edit.edit_page', uuid=w_uuid) }}">{{ w.label }}</a></li>
{% endfor %}
</ul>
</div>
{% endif %}
<div class="pure-control-group">
<label>{{ _('Tag colour') }}</label>
<div style="display:flex; align-items:center; gap:0.75em;">
<input type="checkbox" id="use_custom_colour"
{% if data.get('tag_colour') %}checked{% endif %}>
<label for="use_custom_colour" style="margin:0">{{ _('Custom colour') }}</label>
<input type="color" id="tag_colour_picker"
value="{{ data.get('tag_colour') or '#4f8ef7' }}"
{% if not data.get('tag_colour') %}disabled{% endif %}>
<input type="hidden" name="tag_colour" id="tag_colour_hidden"
value="{{ data.get('tag_colour', '') }}">
</div>
<span class="pure-form-message-inline">{{ _('Leave unchecked to use the auto-generated colour based on the tag name.') }}</span>
</div>
<script>
(function () {
var cb = document.getElementById('use_custom_colour');
var picker = document.getElementById('tag_colour_picker');
var hidden = document.getElementById('tag_colour_hidden');
picker.addEventListener('input', function () { hidden.value = this.value; });
cb.addEventListener('change', function () {
picker.disabled = !this.checked;
hidden.value = this.checked ? picker.value : '';
});
})();
</script>
</fieldset>
</div>
@@ -3,26 +3,6 @@
{% from '_helpers.html' import render_simple_field, render_field %}
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='modal.js')}}"></script>
<style>
{%- for uuid, tag in available_tags -%}
{%- if tag and tag.title -%}
{%- set class_name = tag.title|sanitize_tag_class -%}
{%- if tag.get('tag_colour') -%}
.watch-tag-list.tag-{{ class_name }} { background-color: {{ tag.tag_colour }}; color: {{ wcag_text_color(tag.tag_colour) }}; }
{%- else -%}
{%- set colors = generate_tag_colors(tag.title) -%}
.watch-tag-list.tag-{{ class_name }} {
background-color: {{ colors['light']['bg'] }};
color: {{ colors['light']['color'] }};
}
html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
background-color: {{ colors['dark']['bg'] }};
color: {{ colors['dark']['color'] }};
}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
</style>
<div class="box">
<form class="pure-form" action="{{ url_for('tags.form_tag_add') }}" method="POST" id="new-watch-form">
@@ -68,7 +48,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
</td>
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a></td>
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
<td>
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">{{ _('Edit') }}</a>
<a href="{{ url_for('ui.form_watch_checknow', tag=uuid) }}" class="pure-button pure-button-primary" >{{ _('Recheck') }}</a>
+13 -19
View File
@@ -141,7 +141,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
# Import the login decorator
from changedetectionio.auth_decorator import login_optionally_required
@ui_blueprint.route("/clear_history/<uuid_str:uuid>", methods=['GET'])
@ui_blueprint.route("/clear_history/<string:uuid>", methods=['GET'])
@login_optionally_required
def clear_watch_history(uuid):
try:
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
@login_optionally_required
def clear_all_history():
if request.method == 'POST':
confirmtext = request.form.get('confirmtext', '')
confirmtext = request.form.get('confirmtext')
if confirmtext.strip().lower() == gettext('clear').strip().lower():
if confirmtext == 'clear':
# Run in background thread to avoid blocking
def clear_history_background():
# Capture UUIDs first to avoid race conditions
@@ -194,9 +194,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
tag_limit = request.args.get('tag')
now = int(time.time())
# Mark watches as viewed - use background thread only for large watch counts
def mark_viewed_impl():
"""Mark watches as viewed - can run synchronously or in background thread."""
# Mark watches as viewed in background thread to avoid blocking
def mark_viewed_background():
"""Background thread to mark watches as viewed - discarded after completion."""
marked_count = 0
try:
for watch_uuid, watch in datastore.data['watching'].items():
@@ -209,21 +209,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
datastore.set_last_viewed(watch_uuid, now)
marked_count += 1
logger.info(f"Marking complete: {marked_count} watches marked as viewed")
logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
except Exception as e:
logger.error(f"Error marking as viewed: {e}")
logger.error(f"Error in background mark as viewed: {e}")
# For small watch counts (< 10), run synchronously to avoid race conditions in tests
# For larger counts, use background thread to avoid blocking the UI
watch_count = len(datastore.data['watching'])
if watch_count < 10:
# Run synchronously for small watch counts
mark_viewed_impl()
else:
# Start background thread for large watch counts
thread = threading.Thread(target=mark_viewed_impl, daemon=True)
thread.start()
# Start background thread and return immediately
thread = threading.Thread(target=mark_viewed_background, daemon=True)
thread.start()
flash(gettext("Marking watches as viewed in background..."))
return redirect(url_for('watchlist.index', tag=tag_limit))
@ui_blueprint.route("/delete", methods=['GET'])
@@ -366,7 +360,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
return redirect(url_for('watchlist.index'))
@ui_blueprint.route("/share-url/<uuid_str:uuid>", methods=['GET'])
@ui_blueprint.route("/share-url/<string:uuid>", methods=['GET'])
@login_optionally_required
def form_share_put_watch(uuid):
"""Given a watch UUID, upload the info and return a share-link
+4 -4
View File
@@ -66,7 +66,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return Markup(result)
@diff_blueprint.route("/diff/<uuid_str:uuid>", methods=['GET'])
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
@login_optionally_required
def diff_history_page(uuid):
"""
@@ -128,7 +128,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
redirect=redirect
)
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['GET'])
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
@login_optionally_required
def diff_history_page_extract_GET(uuid):
"""
@@ -182,7 +182,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
redirect=redirect
)
@diff_blueprint.route("/diff/<uuid_str:uuid>/extract", methods=['POST'])
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
@login_optionally_required
def diff_history_page_extract_POST(uuid):
"""
@@ -238,7 +238,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
redirect=redirect
)
@diff_blueprint.route("/diff/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
@login_optionally_required
def processor_asset(uuid, asset_name):
"""
+13 -28
View File
@@ -20,13 +20,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
return True
@edit_blueprint.route("/edit/<uuid_str:uuid>", methods=['GET', 'POST'])
@edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_optionally_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
def edit_page(uuid):
from changedetectionio import forms
from changedetectionio.browser_steps.browser_steps import browser_step_ui_config
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
from changedetectionio import processors
import importlib
@@ -117,25 +117,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
processor_config = processor_instance.get_extra_watch_config(config_filename)
if processor_config:
from wtforms.fields.form import FormField
# Populate processor-config-* fields from JSON
for config_key, config_value in processor_config.items():
if not isinstance(config_value, dict):
continue
# Try exact API-named field first (e.g., processor_config_restock_diff)
target_field = getattr(form, f'processor_config_{config_key}', None)
# Fallback: find any FormField sub-form whose fields cover config_value keys
if target_field is None:
for form_field in form:
if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value):
target_field = form_field
break
if target_field is not None:
for sub_key, sub_value in config_value.items():
sub_field = target_field.form._fields.get(sub_key)
if sub_field is not None:
sub_field.data = sub_value
logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}")
field_name = f'processor_config_{config_key}'
if hasattr(form, field_name):
getattr(form, field_name).data = config_value
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
except Exception as e:
logger.warning(f"Failed to load processor config: {e}")
@@ -320,12 +307,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'using_global_webdriver_wait': not default['webdriver_delay'],
'uuid': uuid,
'watch': watch,
'capabilities': capabilities,
'auto_applied_tags': {
tag_uuid: tag
for tag_uuid, tag in datastore.data['settings']['application']['tags'].items()
if tag_uuid not in watch.get('tags', []) and tag.matches_url(watch.get('url', ''))
},
'capabilities': capabilities
}
included_content = None
@@ -345,7 +327,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
return output
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-html", methods=['GET'])
@edit_blueprint.route("/edit/<string:uuid>/get-html", methods=['GET'])
@login_optionally_required
def watch_get_latest_html(uuid):
from io import BytesIO
@@ -372,7 +354,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# Return a 500 error
abort(500)
@edit_blueprint.route("/edit/<uuid_str:uuid>/get-data-package", methods=['GET'])
@edit_blueprint.route("/edit/<string:uuid>/get-data-package", methods=['GET'])
@login_optionally_required
def watch_get_data_package(uuid):
"""Download all data for a single watch as a zip file"""
@@ -382,6 +364,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
from pathlib import Path
import datetime
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
watch = datastore.data['watching'].get(uuid)
if not watch:
abort(404)
@@ -423,7 +408,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
mimetype='application/zip')
# Ajax callback
@edit_blueprint.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])
@edit_blueprint.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
@login_optionally_required
def watch_get_preview_rendered(uuid):
'''For when viewing the "preview" of the rendered text from inside of Edit'''
+8 -7
View File
@@ -10,8 +10,7 @@ from changedetectionio import html_tools
def construct_blueprint(datastore: ChangeDetectionStore):
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
@login_optionally_required
def preview_page(uuid):
"""
@@ -60,8 +59,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
versions = []
timestamp = None
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = watch.fetcher_supports_screenshots
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
triggered_line_numbers = []
ignored_line_numbers = []
@@ -71,9 +74,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
else:
# So prepare the latest preview or not
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
preferred_version = request.args.get('version')
versions = list(watch.history.keys())
timestamp = versions[-1]
if preferred_version and preferred_version in versions:
@@ -124,7 +125,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return output
@preview_blueprint.route("/preview/<uuid_str:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
@preview_blueprint.route("/preview/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
@login_optionally_required
def processor_asset(uuid, asset_name):
"""
@@ -81,14 +81,6 @@
<div class="pure-control-group">
{{ render_field(form.tags) }}
<span class="pure-form-message-inline">{{ _('Organisational tag/group name used in the main listing page') }}</span>
{% if auto_applied_tags %}
<span class="pure-form-message-inline">
{{ _('Also automatically applied by URL pattern:') }}
{% for tag_uuid, tag in auto_applied_tags.items() %}
<a href="{{ url_for('tags.form_tag_edit', uuid=tag_uuid) }}" class="watch-tag-list tag-{{ tag.title|sanitize_tag_class }}">{{ tag.title }}</a>
{% endfor %}
</span>
{% endif %}
</div>
<div class="pure-control-group inline-radio">
{{ render_field(form.processor) }}
@@ -496,7 +488,7 @@ Math: {{ 1 + 1 }}") }}
{% if watch.history_n %}
<p>
<a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">{{ _('Download latest HTML snapshot') }}</a>
<a href="{{url_for('ui.ui_edit.watch_get_data_package', uuid=uuid)}}" class="pure-button button-small">{{ _('Download watch data package') }}</a>
<a href="{{url_for('ui.ui_edit.watch_get_data_package', uuid=uuid)}}" class="pure-button button-small">{{ _('Export watch data') }}</a>
</p>
{% endif %}
@@ -17,7 +17,7 @@
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
{% if versions|length >= 2 %}
<div id="diff-form" style="text-align: center;">
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
<form class="pure-form " action="" method="POST">
<fieldset>
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
name="from_version"
@@ -28,7 +28,6 @@
</option>
{% endfor %}
</select>
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
</fieldset>
@@ -81,7 +81,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
proxy_list = datastore.proxy_list
output = render_template(
"watch-overview.html",
active_tag=active_tag,
@@ -92,9 +91,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
extra_classes='has-queue' if not update_q.empty() else '',
form=form,
generate_tag_colors=processors.generate_processor_badge_colors,
wcag_text_color=processors.wcag_text_color,
guid=datastore.data['app_guid'],
has_proxies=proxy_list,
has_proxies=datastore.proxy_list,
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
now_time_server=round(time.time()),
pagination=pagination,
@@ -112,16 +110,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
watches=sorted_watches
)
# Return freed template-building memory to the OS immediately.
# render_template allocates ~20MB of intermediate strings that are freed on return,
# but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces
# glibc to release them, preventing RSS growth from concurrent Chrome connections.
try:
import ctypes
ctypes.CDLL('libc.so.6').malloc_trim(0)
except Exception:
pass
if session.get('share-link'):
del (session['share-link'])
@@ -71,13 +71,6 @@ document.addEventListener('DOMContentLoaded', function() {
{%- for uuid, tag in tags -%}
{%- if tag and tag.title -%}
{%- set class_name = tag.title|sanitize_tag_class -%}
{%- if tag.get('tag_colour') -%}
.button-tag.tag-{{ class_name }},
.watch-tag-list.tag-{{ class_name }} {
background-color: {{ tag.tag_colour }};
color: {{ wcag_text_color(tag.tag_colour) }};
}
{%- else -%}
{%- set colors = generate_tag_colors(tag.title) -%}
.button-tag.tag-{{ class_name }} {
background-color: {{ colors['light']['bg'] }};
@@ -99,7 +92,6 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
color: {{ colors['dark']['color'] }};
}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
</style>
<div class="box" id="form-quick-watch-add">
@@ -221,13 +213,12 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
{%- set checking_now = is_checking_now(watch) -%}
{%- set history_n = watch.history_n -%}
{%- set favicon = watch.get_favicon_filename() -%}
{%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%}
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
{%- set row_classes = [
loop.cycle('pure-table-odd', 'pure-table-even'),
'processor-' ~ watch['processor'],
'has-error' if error_texts|length > 2 else '',
'has-error' if watch.compile_error_texts()|length > 2 else '',
'paused' if watch.paused is defined and watch.paused != False else '',
'unviewed' if watch.has_unviewed else '',
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
@@ -280,7 +271,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
{% endif %}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a>
</span>
<div class="error-text" style="display:none;">{{ error_texts|safe }}</div>
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
{%- if watch['processor'] == 'text_json_diff' -%}
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
@@ -313,20 +304,11 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
</span>
{%- endif -%}
{%- if watch.get('restock') and watch['restock'].get('price') -%}
{%- set restock = watch['restock'] -%}
{%- set price = restock.get('price') -%}
{%- set cur = restock.get('currency','') -%}
{%- if price is not none and (price|string)|regex_search('\d') -%}
<span class="restock-label price" title="{{ _('Price') }}">
{# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #}
{%- if price is number -%}{# It's a number so we can convert it to their locale' #}
{{ price|format_number_locale }} {{ cur }}<!-- as number -->
{%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #}
{{ price }} {{ cur }}<!-- as string -->
{%- endif -%}
</span>
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
{%- if watch['restock']['price'] != None -%}
<span class="restock-label price" title="{{ _('Price') }}">
{{ watch['restock']['price']|format_number_locale if watch['restock'].get('price') else '' }} {{ watch['restock'].get('currency','') }}
</span>
{%- endif -%}
{%- elif not watch.has_restock_info -%}
<span class="restock-label error">{{ _('No information') }}</span>
+18 -3
View File
@@ -38,6 +38,7 @@ def manage_user_agent(headers, current_ua=''):
return None
class Fetcher():
browser_connection_is_custom = None
browser_connection_url = None
@@ -162,16 +163,30 @@ class Fetcher():
"""
return {k.lower(): v for k, v in self.headers.items()}
def browser_steps_get_valid_steps(self):
if self.browser_steps is not None and len(self.browser_steps):
valid_steps = list(filter(
lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one'),
self.browser_steps))
# Just incase they selected Goto site by accident with older JS
if valid_steps and valid_steps[0]['operation'] == 'Goto site':
del(valid_steps[0])
return valid_steps
return None
async def iterate_browser_steps(self, start_url=None):
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface, browser_steps_get_valid_steps
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
from playwright._impl._errors import TimeoutError, Error
from changedetectionio.jinja2_custom import render as jinja_render
step_n = 0
if self.browser_steps:
if self.browser_steps is not None and len(self.browser_steps):
interface = steppable_browser_interface(start_url=start_url)
interface.page = self.page
valid_steps = browser_steps_get_valid_steps(self.browser_steps)
valid_steps = self.browser_steps_get_valid_steps()
for step in valid_steps:
step_n += 1
@@ -49,9 +49,6 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
if page_height > page.viewport_size['height']:
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
# captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
viewport_start = time.time()
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
# Set viewport to a larger size to capture more content at once
@@ -298,7 +295,7 @@ class fetcher(Fetcher):
self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
# Re-use as much code from browser steps as possible so its the same
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
browsersteps_interface = steppable_browser_interface(start_url=url)
browsersteps_interface.page = self.page
@@ -365,7 +362,7 @@ class fetcher(Fetcher):
# Wrap remaining operations in try/finally to ensure cleanup
try:
# Run Browser Steps here
if self.browser_steps:
if self.browser_steps_get_valid_steps():
try:
await self.iterate_browser_steps(start_url=url)
except BrowserStepsStepException:
@@ -75,9 +75,6 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
if page_height > page.viewport['height']:
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
# Never set viewport taller than our max capture height - otherwise one screenshot chunk
# captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
viewport_start = time.time()
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
viewport_time = time.time() - viewport_start
@@ -89,8 +86,8 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
# better than scrollTo incase they override it in the page
await page.evaluate(
"""(y) => {
const el = document.scrollingElement;
if (el) el.scrollTop = y;
document.documentElement.scrollTop = y;
document.body.scrollTop = y;
}""",
y
)
@@ -308,8 +305,6 @@ class fetcher(Fetcher):
await asyncio.wait_for(self.browser.close(), timeout=3.0)
except Exception as cleanup_error:
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
finally:
self.browser = None
raise
# Add console handler to capture console.log from favicon fetcher
@@ -461,7 +456,7 @@ class fetcher(Fetcher):
# Run Browser Steps here
# @todo not yet supported, we switch to playwright in this case
# if self.browser_steps:
# if self.browser_steps_get_valid_steps():
# self.iterate_browser_steps()
@@ -537,14 +532,6 @@ class fetcher(Fetcher):
)
except asyncio.TimeoutError:
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
finally:
# Internal cleanup on any exception/timeout - call quit() immediately
# This prevents connection leaks during exception bursts
# Worker.py's quit() call becomes a redundant safety net (idempotent)
try:
await self.quit(watch={'uuid': watch_uuid} if watch_uuid else None)
except Exception as cleanup_error:
logger.error(f"[{watch_uuid}] Error during internal quit() cleanup: {cleanup_error}")
# Plugin registration for built-in fetcher
+8 -65
View File
@@ -1,14 +1,12 @@
from loguru import logger
from urllib.parse import urljoin, urlparse
import hashlib
import os
import re
import asyncio
from functools import partial
from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
from changedetectionio.content_fetchers.base import Fetcher
from changedetectionio.validate_url import is_private_hostname
# "html_requests" is listed as the default fetcher in store.py!
@@ -38,7 +36,7 @@ class fetcher(Fetcher):
import requests
from requests.exceptions import ProxyError, ConnectionError, RequestException
if self.browser_steps:
if self.browser_steps_get_valid_steps():
raise BrowserStepsInUnsupportedFetcher(url=url)
proxies = {}
@@ -81,48 +79,14 @@ class fetcher(Fetcher):
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
from requests_file import FileAdapter
session.mount('file://', FileAdapter())
allow_iana_restricted = strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false'))
try:
# Fresh DNS check at fetch time — catches DNS rebinding regardless of add-time cache.
if not allow_iana_restricted:
parsed_initial = urlparse(url)
if parsed_initial.hostname and is_private_hostname(parsed_initial.hostname):
raise Exception(f"Fetch blocked: '{url}' resolves to a private/reserved IP address. "
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow.")
r = session.request(method=request_method,
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
url=url,
headers=request_headers,
timeout=timeout,
proxies=proxies,
verify=False,
allow_redirects=False)
# Manually follow redirects so each hop's resolved IP can be validated,
# preventing SSRF via an open redirect on a public host.
current_url = url
for _ in range(10):
if not r.is_redirect:
break
location = r.headers.get('Location', '')
redirect_url = urljoin(current_url, location)
if not allow_iana_restricted:
parsed_redirect = urlparse(redirect_url)
if parsed_redirect.hostname and is_private_hostname(parsed_redirect.hostname):
raise Exception(f"Redirect blocked: '{redirect_url}' resolves to a private/reserved IP address.")
current_url = redirect_url
r = session.request('GET', redirect_url,
headers=request_headers,
timeout=timeout,
proxies=proxies,
verify=False,
allow_redirects=False)
else:
raise Exception("Too many redirects")
verify=False)
except Exception as e:
msg = str(e)
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
@@ -148,32 +112,10 @@ class fetcher(Fetcher):
# Default to UTF-8 for XML if no encoding found
r.encoding = 'utf-8'
else:
# No charset in HTTP header - sniff encoding in priority order matching browsers
# (WHATWG encoding sniffing algorithm):
# 1. BOM - highest confidence, check before anything else
# 2. <meta charset> in first 2kb
# 3. chardet statistical detection - last resort
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
boms = [
(b'\xef\xbb\xbf', 'utf-8-sig'),
(b'\xff\xfe', 'utf-16-le'),
(b'\xfe\xff', 'utf-16-be'),
]
bom_encoding = next((enc for bom, enc in boms if r.content.startswith(bom)), None)
if bom_encoding:
logger.info(f"URL: {url} Using encoding '{bom_encoding}' detected from BOM")
r.encoding = bom_encoding
else:
meta_charset_match = re.search(rb'<meta[^>]+charset\s*=\s*["\']?\s*([^"\'\s;>]+)', r.content[:2000], re.IGNORECASE)
if meta_charset_match:
encoding = meta_charset_match.group(1).decode('ascii', errors='ignore')
logger.info(f"URL: {url} No content-type encoding in HTTP headers - Using encoding '{encoding}' from HTML meta charset tag")
r.encoding = encoding
else:
encoding = chardet.detect(r.content)['encoding']
logger.warning(f"URL: {url} No charset in headers or meta tag, guessed encoding as '{encoding}' via chardet")
if encoding:
r.encoding = encoding
# For other content types, use chardet
encoding = chardet.detect(r.content)['encoding']
if encoding:
r.encoding = encoding
self.headers = r.headers
@@ -242,6 +184,7 @@ class fetcher(Fetcher):
)
async def quit(self, watch=None):
# In case they switched to `requests` fetcher from something else
# Then the screenshot could be old, in any case, it's not used here.
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
@@ -56,10 +56,6 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
im.close()
del images
# Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
if total_height > capture_height:
stitched = stitched.crop((0, 0, max_width, capture_height))
# Draw caption only if page was trimmed
if original_page_height > capture_height:
draw = ImageDraw.Draw(stitched)
@@ -104,17 +104,15 @@ class fetcher(Fetcher):
from selenium.webdriver.remote.remote_connection import RemoteConnection
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
from selenium.webdriver.remote.client_config import ClientConfig
from urllib3.util import Timeout
driver = None
try:
connection_timeout = int(os.getenv("WEBDRIVER_CONNECTION_TIMEOUT", 90))
client_config = ClientConfig(
remote_server_addr=self.browser_connection_url,
timeout=Timeout(connect=connection_timeout, total=connection_timeout)
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
remote_connection = RemoteConnection(
self.browser_connection_url,
)
remote_connection = RemoteConnection(client_config=client_config)
remote_connection.set_timeout(30) # seconds
# Now create the driver with the RemoteConnection
driver = RemoteWebDriver(
command_executor=remote_connection,
options=options
-30
View File
@@ -45,36 +45,6 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
# Compiled regex patterns for performance
WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')
# Regexes built from the constants above — no brittle hardcoded strings
_EXTRACT_REMOVED_RE = re.compile(
re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
+ r'|' +
re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
)
_EXTRACT_ADDED_RE = re.compile(
re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
+ r'|' +
re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
)
def extract_changed_from(raw_diff: str) -> str:
"""Extract only the removed/changed-from fragments from a raw diff string.
Useful for {{diff_changed_from}} gives just the old value (e.g. old price),
not the full surrounding line. Multiple fragments joined with newlines.
"""
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
def extract_changed_to(raw_diff: str) -> str:
"""Extract only the added/changed-into fragments from a raw diff string.
Useful for {{diff_changed_to}} gives just the new value (e.g. new price),
not the full surrounding line. Multiple fragments joined with newlines.
"""
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
"""
+36 -72
View File
@@ -4,7 +4,6 @@ import flask_login
import locale
import os
import queue
import re
import sys
import threading
import time
@@ -28,6 +27,7 @@ from flask import (
session,
url_for,
)
from flask_compress import Compress as FlaskCompress
from flask_restful import abort, Api
from flask_cors import CORS
@@ -40,7 +40,7 @@ from loguru import logger
from changedetectionio import __version__
from changedetectionio import queuedWatchMetaData
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon, Spec
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
from changedetectionio.api.Search import Search
from .time_handler import is_within_schedule
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
@@ -69,43 +69,19 @@ socketio_server = None
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
CORS(app)
from werkzeug.routing import BaseConverter, ValidationError
from uuid import UUID
class StrictUUIDConverter(BaseConverter):
# Special sentinel values allowed in addition to strict UUIDs
_ALLOWED_SENTINELS = frozenset({'first'})
def to_python(self, value: str) -> str:
if value in self._ALLOWED_SENTINELS:
return value
try:
u = UUID(value)
except ValueError as e:
raise ValidationError() from e
# Reject non-standard formats (braces, URNs, no-hyphens)
if str(u) != value.lower():
raise ValidationError()
return str(u)
def to_url(self, value) -> str:
return str(value)
# app setup (once)
app.url_map.converters["uuid_str"] = StrictUUIDConverter
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak.
# There's also a bug between flask compress and socketio that causes some kind of slow memory leak
# It's better to use compression on your reverse proxy (nginx etc) instead.
if strtobool(os.getenv("FLASK_ENABLE_COMPRESSION")):
from flask_compress import Compress as FlaskCompress
app.config['COMPRESS_MIN_SIZE'] = 2096
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
app.config['COMPRESS_ALGORITHM'] = ['gzip']
compress = FlaskCompress()
compress.init_app(app)
compress = FlaskCompress()
compress.init_app(app)
app.config['TEMPLATES_AUTO_RELOAD'] = False
@@ -212,23 +188,14 @@ def _is_safe_valid_url(test_url):
from .validate_url import is_safe_valid_url
return is_safe_valid_url(test_url)
@app.template_global('get_html_head_extras')
def _get_html_head_extras():
from .pluggy_interface import collect_html_head_extras
return collect_html_head_extras()
@app.template_filter('format_number_locale')
def _jinja2_filter_format_number_locale(value: float) -> str:
"Formats for example 4000.10 to the local locale default of 4,000.10"
# Format the number with two decimal places (locale format string will return 6 decimal)
formatted_value = locale.format_string("%.2f", value, grouping=True)
return formatted_value
@app.template_filter('regex_search')
def _jinja2_filter_regex_search(value, pattern):
import re
return re.search(pattern, str(value)) is not None
return formatted_value
@app.template_global('is_checking_now')
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
@@ -393,8 +360,6 @@ def _jinja2_filter_fetcher_status_icons(fetcher_name):
return ''
_RE_SANITIZE_TAG = re.compile(r'[^a-zA-Z0-9]')
@app.template_filter('sanitize_tag_class')
def _jinja2_filter_sanitize_tag_class(tag_title):
"""Sanitize a tag title to create a valid CSS class name.
@@ -406,8 +371,9 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
Returns:
str: A sanitized string suitable for use as a CSS class name
"""
import re
# Remove all non-alphanumeric characters and convert to lowercase
sanitized = _RE_SANITIZE_TAG.sub('', tag_title).lower()
sanitized = re.sub(r'[^a-zA-Z0-9]', '', tag_title).lower()
# Ensure it starts with a letter (CSS requirement)
if sanitized and not sanitized[0].isalpha():
sanitized = 'tag' + sanitized
@@ -495,21 +461,28 @@ def changedetection_app(config=None, datastore_o=None):
available_languages = get_available_languages()
language_codes = get_language_codes()
_locale_aliases = {
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
}
_locale_match_list = language_codes + list(_locale_aliases.keys())
def get_locale():
# Locale aliases: map browser language codes to translation directory names
# This handles cases where browsers send standard codes (e.g., zh-TW)
# but our translations use more specific codes (e.g., zh_Hant_TW)
locale_aliases = {
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
}
# 1. Try to get locale from session (user explicitly selected)
if 'locale' in session:
return session['locale']
# 2. Fall back to Accept-Language header
browser_locale = request.accept_languages.best_match(_locale_match_list)
# 3. Map browser locale to our internal locale if needed
return _locale_aliases.get(browser_locale, browser_locale)
# Get the best match from browser's Accept-Language header
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
# 3. Check if we need to map the browser locale to our internal locale
if browser_locale in locale_aliases:
return locale_aliases[browser_locale]
return browser_locale
# Initialize Babel with locale selector
babel = Babel(app, locale_selector=get_locale)
@@ -561,22 +534,22 @@ def changedetection_app(config=None, datastore_o=None):
watch_api.add_resource(WatchHistoryDiff,
'/api/v1/watch/<uuid_str:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
'/api/v1/watch/<string:uuid>/difference/<string:from_timestamp>/<string:to_timestamp>',
resource_class_kwargs={'datastore': datastore})
watch_api.add_resource(WatchSingleHistory,
'/api/v1/watch/<uuid_str:uuid>/history/<string:timestamp>',
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
watch_api.add_resource(WatchFavicon,
'/api/v1/watch/<uuid_str:uuid>/favicon',
'/api/v1/watch/<string:uuid>/favicon',
resource_class_kwargs={'datastore': datastore})
watch_api.add_resource(WatchHistory,
'/api/v1/watch/<uuid_str:uuid>/history',
'/api/v1/watch/<string:uuid>/history',
resource_class_kwargs={'datastore': datastore})
watch_api.add_resource(CreateWatch, '/api/v1/watch',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
watch_api.add_resource(Watch, '/api/v1/watch/<uuid_str:uuid>',
watch_api.add_resource(Watch, '/api/v1/watch/<string:uuid>',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
watch_api.add_resource(SystemInfo, '/api/v1/systeminfo',
@@ -589,7 +562,7 @@ def changedetection_app(config=None, datastore_o=None):
watch_api.add_resource(Tags, '/api/v1/tags',
resource_class_kwargs={'datastore': datastore})
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<uuid_str:uuid>',
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
watch_api.add_resource(Search, '/api/v1/search',
@@ -598,8 +571,6 @@ def changedetection_app(config=None, datastore_o=None):
watch_api.add_resource(Notifications, '/api/v1/notifications',
resource_class_kwargs={'datastore': datastore})
watch_api.add_resource(Spec, '/api/v1/full-spec')
@login_manager.user_loader
def user_loader(email):
user = User()
@@ -741,14 +712,8 @@ def changedetection_app(config=None, datastore_o=None):
def static_content(group, filename):
from flask import make_response
import re
# Strict sanitization: only allow a-z, 0-9, and underscore (blocks .. and other traversal)
group = re.sub(r'[^a-z0-9_-]+', '', group.lower())
filename = filename
# Additional safety: reject if sanitization resulted in empty strings
if not group or not filename:
abort(404)
group = re.sub(r'[^\w.-]+', '', group.lower())
filename = re.sub(r'[^\w.-]+', '', filename.lower())
if group == 'screenshot':
# Could be sensitive, follow password requirements
@@ -1022,16 +987,15 @@ def check_for_new_version():
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
session = requests.Session()
session.verify = False
while not app.config.exit.is_set():
try:
r = session.post("https://changedetection.io/check-ver.php",
r = requests.post("https://changedetection.io/check-ver.php",
data={'version': __version__,
'app_guid': datastore.data['app_guid'],
'watch_count': len(datastore.data['watching'])
})
},
verify=False)
except:
pass
+10 -7
View File
@@ -7,6 +7,8 @@ from flask_babel import lazy_gettext as _l, gettext
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
from changedetectionio.conditions.form import ConditionFormRow
from changedetectionio.notification_service import NotificationContextData
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS, \
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
from changedetectionio.strtobool import strtobool
from changedetectionio import processors
@@ -35,7 +37,7 @@ from changedetectionio.widgets import TernaryNoneBooleanField
# default
# each select <option data-enabled="enabled-0-0"
from changedetectionio.browser_steps.browser_steps import browser_step_ui_config
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
from changedetectionio import html_tools, content_fetchers
@@ -492,6 +494,7 @@ class ValidateJinja2Template(object):
Validates that a {token} is from a valid set
"""
def __call__(self, form, field):
from changedetectionio import notification
from changedetectionio.jinja2_custom import create_jinja_env
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
from jinja2.meta import find_undeclared_variables
@@ -608,12 +611,13 @@ class ValidateCSSJSONXPATHInput(object):
raise ValidationError("XPath not permitted in this field!")
from lxml import etree, html
import elementpath
from changedetectionio.html_tools import SafeXPath3Parser
# xpath 2.0-3.1
from elementpath.xpath3 import XPath3Parser
tree = html.fromstring("<html></html>")
line = line.replace('xpath:', '')
try:
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
elementpath.select(tree, line.strip(), parser=XPath3Parser)
except elementpath.ElementPathError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (line, str(e)))
@@ -667,11 +671,9 @@ class ValidateCSSJSONXPATHInput(object):
# `jq` requires full compilation in windows and so isn't generally available
raise ValidationError("jq not support not found")
from changedetectionio.html_tools import validate_jq_expression
input = line.replace('jq:', '')
try:
validate_jq_expression(input)
jq.compile(input)
except (ValueError) as e:
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
@@ -818,7 +820,8 @@ class processor_text_json_diff_form(commonSettingsForm):
filter_text_removed = BooleanField(_l('Removed lines'), default=True)
trigger_text = StringListField(_l('Keyword triggers - Trigger/wait for text'), [validators.Optional(), ValidateListRegex()])
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
text_should_not_be_present = StringListField(_l('Block change-detection while text matches'), [validators.Optional(), ValidateListRegex()])
webdriver_js_execute_code = TextAreaField(_l('Execute JavaScript before change detection'), render_kw={"rows": "5"}, validators=[validators.Optional()])
@@ -1007,7 +1010,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
)
password = SaltyPasswordField(_l('Password'), render_kw={"autocomplete": "new-password"})
password = SaltyPasswordField(_l('Password'))
pager_size = IntegerField(_l('Pager size'),
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,
+13 -146
View File
@@ -4,7 +4,6 @@ from loguru import logger
from typing import List
import html
import json
import os
import re
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
@@ -14,45 +13,6 @@ PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
# jq builtins that can leak sensitive data or cause harm when user-supplied expressions are executed.
# env/$ENV reads all process environment variables (passwords, API keys, etc.)
# include/import can read arbitrary files from disk
# input/inputs reads beyond the supplied JSON data
# debug/stderr leaks data to stderr
# halt/halt_error terminates the process (DoS)
_JQ_BLOCKED_PATTERNS = [
(re.compile(r'\benv\b'), 'env (reads environment variables)'),
(re.compile(r'\$ENV\b'), '$ENV (reads environment variables)'),
(re.compile(r'\binclude\b'), 'include (reads files from disk)'),
(re.compile(r'\bimport\b'), 'import (reads files from disk)'),
(re.compile(r'\binputs?\b'), 'input/inputs (reads beyond provided data)'),
(re.compile(r'\bdebug\b'), 'debug (leaks data to stderr)'),
(re.compile(r'\bstderr\b'), 'stderr (leaks data to stderr)'),
(re.compile(r'\bhalt(?:_error)?\b'), 'halt/halt_error (terminates the process)'),
(re.compile(r'\$__loc__\b'), '$__loc__ (leaks file path information)'),
(re.compile(r'\bbuiltins\b'), 'builtins (enumerates available functions)'),
(re.compile(r'\bmodulemeta\b'), 'modulemeta (leaks module information)'),
(re.compile(r'\$JQ_BUILD_CONFIGURATION\b'), '$JQ_BUILD_CONFIGURATION (leaks build information)'),
]
def validate_jq_expression(expression: str) -> None:
"""Raise ValueError if the jq expression uses any dangerous builtin.
User-supplied jq expressions are executed server-side. Without this check,
builtins like `env` expose every process environment variable (SALTED_PASS,
proxy credentials, API keys, etc.) as watch output.
"""
from changedetectionio.strtobool import strtobool
if strtobool(os.getenv('JQ_ALLOW_RISKY_EXPRESSIONS', 'false')):
return
for pattern, description in _JQ_BLOCKED_PATTERNS:
if pattern.search(expression):
msg = f"jq expression uses disallowed builtin: {description}"
logger.critical(f"Security: blocked jq expression containing '{description}' - expression: {expression!r}")
raise ValueError(msg)
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
# 'price' , 'lowPrice', 'highPrice' are usually under here
@@ -63,59 +23,6 @@ class JSONNotFound(ValueError):
def __init__(self, msg):
ValueError.__init__(self, msg)
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
'unparsed-text',
'unparsed-text-lines',
'unparsed-text-available',
'doc',
'doc-available',
'json-doc',
'json-doc-available',
'collection', # XPath 2.0+: loads XML node collections from arbitrary URIs
'uri-collection', # XPath 3.0+: enumerates URIs from resource collections
'transform', # XPath 3.1: XSLT transformation (currently raises, block proactively)
'load-xquery-module', # XPath 3.1: loads XQuery modules (currently raises, block proactively)
'environment-variable',
'available-environment-variables',
]
def _build_safe_xpath3_parser():
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
XPath 3.0 includes functions that can read arbitrary files or environment variables:
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
- doc / doc-available (XML fetch from URI)
- environment-variable / available-environment-variables (env var leakage)
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
so removing entries here does not affect XPath3Parser itself.
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
"""
import os
from elementpath.xpath3 import XPath3Parser
class SafeXPath3Parser(XPath3Parser):
pass
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
if env_override is not None:
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
else:
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
for _fn in blocked:
SafeXPath3Parser.symbol_table.pop(_fn, None)
return SafeXPath3Parser
# Module-level singleton — built once, reused everywhere.
SafeXPath3Parser = _build_safe_xpath3_parser()
# Doesn't look like python supports forward slash auto enclosure in re.findall
# So convert it to inline flag "(?i)foobar" type configuration
@lru_cache(maxsize=100)
@@ -276,6 +183,8 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
"""
from lxml import etree, html
import elementpath
# xpath 2.0-3.1
from elementpath.xpath3 import XPath3Parser
parser = etree.HTMLParser()
tree = None
@@ -301,7 +210,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
# This allows //title to match elements in the default namespace
namespaces[''] = tree.nsmap[None]
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
@@ -326,9 +235,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
else:
html_block += elementpath_tostring(element)
# Drop element references before the finally block so tree.clear() can release
# the libxml2 document immediately (elements pin the C-level doc via refcount).
del r
return html_block
finally:
# Explicitly clear the tree to free memory
@@ -424,16 +330,12 @@ def _parse_json(json_data, json_filter):
raise Exception("jq not support not found")
if json_filter.startswith("jq:"):
expr = json_filter.removeprefix("jq:")
validate_jq_expression(expr)
jq_expression = jq.compile(expr)
jq_expression = jq.compile(json_filter.removeprefix("jq:"))
match = jq_expression.input(json_data).all()
return _get_stripped_text_from_json_match(match)
if json_filter.startswith("jqraw:"):
expr = json_filter.removeprefix("jqraw:")
validate_jq_expression(expr)
jq_expression = jq.compile(expr)
jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
match = jq_expression.input(json_data).all()
return '\n'.join(str(item) for item in match)
@@ -537,25 +439,13 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
else:
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
# Server may claim application/json but actually return JSONP
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
if jsonp_match:
try:
inner = jsonp_match.group(1).strip()
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
if not stripped_text_from_html:
# Probably something else, go fish inside for it
try:
stripped_text_from_html = extract_json_blob_from_html(content=content,
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
json_filter=json_filter)
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
# Probably something else, go fish inside for it
try:
stripped_text_from_html = extract_json_blob_from_html(content=content,
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
json_filter=json_filter )
except json.JSONDecodeError as e:
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
if not stripped_text_from_html:
# Re 265 - Just return an empty string when filter not found
@@ -671,33 +561,10 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
)
else:
parser_config = None
if is_rss:
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
html_content = re.sub(r'</title>', r'</h1>', html_content)
else:
# Use BS4 html.parser to strip bloat — SPA's often dump 10MB+ of CSS/JS into <head>,
# causing inscriptis to silently give up. Regex-based stripping is unsafe because tags
# can appear inside JSON data attributes with JS-escaped closing tags (e.g. <\/script>),
# causing the regex to scan past the intended close and eat real page content.
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
# Strip tags that inscriptis cannot render as meaningful text and which can be very large.
# svg/math: produce path-data/MathML garbage; canvas/iframe/template: no inscriptis handlers.
# video/audio/picture are kept — they may contain meaningful fallback text or captions.
for tag in soup.find_all(['head', 'script', 'style', 'noscript', 'svg',
'math', 'canvas', 'iframe', 'template']):
tag.decompose()
# SPAs often use <body style="display:none"> to hide content until JS loads.
# inscriptis respects CSS display rules, so strip hiding styles from the body tag.
body_tag = soup.find('body')
if body_tag and body_tag.get('style'):
style = body_tag['style']
if re.search(r'\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b', style, re.IGNORECASE):
logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{style}')")
del body_tag['style']
html_content = str(soup)
text_content = get_text(html_content, config=parser_config)
return text_content
+10 -15
View File
@@ -28,20 +28,17 @@ def get_timeago_locale(flask_locale):
str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT')
"""
locale_map = {
'zh': 'zh_CN', # Chinese Simplified
'zh': 'zh_CN', # Chinese Simplified
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
'pt': 'pt_PT', # Portuguese (Portugal)
'pt_BR': 'pt_BR', # Portuguese (Brasil)
'sv': 'sv_SE', # Swedish
'no': 'nb_NO', # Norwegian Bokmål
'hi': 'in_HI', # Hindi
'cs': 'en', # Czech not supported by timeago, fallback to English
'ja': 'ja', # Japanese
'uk': 'uk', # Ukrainian
'en_GB': 'en', # British English - timeago uses 'en'
'en_US': 'en', # American English - timeago uses 'en'
'pt': 'pt_PT', # Portuguese (Portugal)
'sv': 'sv_SE', # Swedish
'no': 'nb_NO', # Norwegian Bokmål
'hi': 'in_HI', # Hindi
'cs': 'en', # Czech not supported by timeago, fallback to English
'en_GB': 'en', # British English - timeago uses 'en'
'en_US': 'en', # American English - timeago uses 'en'
}
return locale_map.get(flask_locale, flask_locale)
@@ -55,8 +52,7 @@ LANGUAGE_DATA = {
'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'},
'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'},
'es': {'flag': 'fi fi-es fis', 'name': 'Español'},
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português (Portugal)'},
'pt_BR': {'flag': 'fi fi-br fis', 'name': 'Português (Brasil)'},
'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'},
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
@@ -71,7 +67,6 @@ LANGUAGE_DATA = {
'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'},
'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'},
'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'},
'uk': {'flag': 'fi fi-ua fis', 'name': 'Українська'},
}
+2 -9
View File
@@ -2,7 +2,6 @@ from os import getenv
from copy import deepcopy
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
from changedetectionio.model.Tags import TagsDict
from changedetectionio.notification import (
default_notification_body,
@@ -69,7 +68,7 @@ class model(dict):
'schema_version' : 0,
'shared_diff_access': False,
'strip_ignored_lines': False,
'tags': None, # Initialized in __init__ with real datastore_path
'tags': {}, #@todo use Tag.model initialisers
'webdriver_delay': None , # Extra delay in seconds before extracting text
'ui': {
'use_page_title_in_list': True,
@@ -81,16 +80,10 @@ class model(dict):
}
}
def __init__(self, *arg, datastore_path=None, **kw):
def __init__(self, *arg, **kw):
super(model, self).__init__(*arg, **kw)
# Capture any tags data passed in before base_config overwrites the structure
existing_tags = self.get('settings', {}).get('application', {}).get('tags') or {}
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
self.update(deepcopy(self.base_config))
# TagsDict requires the real datastore_path at runtime (cannot be set at class-definition time)
if datastore_path is None:
raise ValueError("App.model() requires 'datastore_path' keyword argument")
self['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=datastore_path)
def parse_headers_from_text_file(filepath):
-15
View File
@@ -46,26 +46,11 @@ class model(EntityPersistenceMixin, watch_base):
super(model, self).__init__(*arg, **kw)
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
self['url_match_pattern'] = kw.get('default', {}).get('url_match_pattern', '')
if kw.get('default'):
self.update(kw['default'])
del kw['default']
def matches_url(self, url: str) -> bool:
"""Return True if this tag should be auto-applied to the given watch URL.
Wildcard patterns (*,?,[ ) use fnmatch; anything else is a case-insensitive
substring match. Returns False if no pattern is configured.
"""
import fnmatch
pattern = self.get('url_match_pattern', '').strip()
if not pattern or not url:
return False
if any(c in pattern for c in ('*', '?', '[')):
return fnmatch.fnmatch(url.lower(), pattern.lower())
return pattern.lower() in url.lower()
# _save_to_disk() method provided by EntityPersistenceMixin
# commit() and _get_commit_data() methods inherited from watch_base
# Tag uses default _get_commit_data() (includes all keys)
-39
View File
@@ -1,39 +0,0 @@
import os
import shutil
from pathlib import Path
from loguru import logger
_SENTINEL = object()
class TagsDict(dict):
"""Dict subclass that removes the corresponding tag.json file when a tag is deleted."""
def __init__(self, *args, datastore_path: str | os.PathLike, **kwargs) -> None:
self._datastore_path = Path(datastore_path)
super().__init__(*args, **kwargs)
def __delitem__(self, key: str) -> None:
super().__delitem__(key)
tag_dir = self._datastore_path / key
tag_json_file = tag_dir / "tag.json"
if not os.path.exists(tag_json_file):
logger.critical(f"Aborting deletion of directory '{tag_dir}' because '{tag_json_file}' does not exist.")
return
try:
shutil.rmtree(tag_dir)
logger.info(f"Deleted tag directory for tag {key!r}")
except FileNotFoundError:
pass
except OSError as e:
logger.error(f"Failed to delete tag directory for tag {key!r}: {e}")
def pop(self, key: str, default=_SENTINEL):
"""Remove and return tag, deleting its tag.json file. Raises KeyError if missing and no default given."""
if key in self:
value = self[key]
del self[key]
return value
if default is _SENTINEL:
raise KeyError(key)
return default
+32 -38
View File
@@ -43,11 +43,6 @@ from ..html_tools import TRANSLATE_WHITESPACE_TABLE
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
# Module-level favicon filename cache: data_dir → basename (or None)
# Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests.
# Invalidated explicitly in bump_favicon() when a new favicon is saved.
_FAVICON_FILENAME_CACHE: dict = {}
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -388,25 +383,6 @@ class model(EntityPersistenceMixin, watch_base):
return self.get('fetch_backend')
@property
def fetcher_supports_screenshots(self):
"""Return True if the fetcher configured for this watch supports screenshots.
Resolves 'system' via self._datastore, then checks supports_screenshots on
the actual fetcher class. Works for built-in and plugin fetchers alike.
"""
from changedetectionio import content_fetchers
fetcher_name = self.get_fetch_backend # already handles is_pdf → html_requests
if not fetcher_name or fetcher_name == 'system':
fetcher_name = self._datastore['settings']['application'].get('fetch_backend', 'html_requests')
fetcher_class = getattr(content_fetchers, fetcher_name, None)
if fetcher_class is None:
return False
return bool(getattr(fetcher_class, 'supports_screenshots', False))
@property
def is_pdf(self):
url = str(self.get("url") or "").lower()
@@ -830,8 +806,9 @@ class model(EntityPersistenceMixin, watch_base):
with open(fname, 'wb') as f:
f.write(decoded)
# Invalidate module-level favicon filename cache for this watch
_FAVICON_FILENAME_CACHE.pop(self.data_dir, None)
# Invalidate favicon filename cache
if hasattr(self, '_favicon_filename_cache'):
delattr(self, '_favicon_filename_cache')
# A signal that could trigger the socket server to update the browser also
watch_check_update = signal('watch_favicon_bump')
@@ -846,23 +823,35 @@ class model(EntityPersistenceMixin, watch_base):
def get_favicon_filename(self) -> str | None:
"""
Find any favicon.* file in the watch data directory.
Find any favicon.* file in the current working directory
and return the contents of the newest one.
Uses a module-level cache keyed by data_dir to survive Watch object recreation,
deepcopy (which drops instance attrs), and concurrent request races.
Invalidated by bump_favicon() when a new favicon is saved.
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
Returns:
str: Basename of the favicon file, or None if not found.
str: Basename of the newest favicon file, or None if not found.
"""
if self.data_dir in _FAVICON_FILENAME_CACHE:
return _FAVICON_FILENAME_CACHE[self.data_dir]
# Check cache first (prevents 26M+ allocations from repeated glob operations)
cache_key = '_favicon_filename_cache'
if hasattr(self, cache_key):
return getattr(self, cache_key)
import glob
# Search for all favicon.* files
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
fname = os.path.basename(files[0]) if files else None
_FAVICON_FILENAME_CACHE[self.data_dir] = fname
return fname
if not files:
result = None
else:
# Find the newest by modification time
newest_file = max(files, key=os.path.getmtime)
result = os.path.basename(newest_file)
# Cache the result
setattr(self, cache_key, result)
return result
def get_screenshot_as_thumbnail(self, max_age=3200):
"""Return path to a square thumbnail of the most recent screenshot.
@@ -1193,13 +1182,18 @@ class model(EntityPersistenceMixin, watch_base):
def compile_error_texts(self, has_proxies=None):
"""Compile error texts for this watch.
Accepts has_proxies parameter to ensure it works even outside app context"""
from flask import url_for, has_request_context
from flask import url_for
from markupsafe import Markup
output = [] # Initialize as list since we're using append
last_error = self.get('last_error','')
has_app_context = has_request_context()
try:
url_for('settings.settings_page')
except Exception as e:
has_app_context = False
else:
has_app_context = True
# has app+request context, we can use url_for()
if has_app_context:
-7
View File
@@ -6,8 +6,6 @@ from .persistence import EntityPersistenceMixin, _determine_entity_type
__all__ = ['EntityPersistenceMixin', 'watch_base']
from ..browser_steps.browser_steps import browser_steps_get_valid_steps
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
@@ -366,10 +364,6 @@ class watch_base(dict):
self._mark_field_as_edited(key)
def update(self, *args, **kwargs):
if args and args[0].get('browser_steps'):
args[0]['browser_steps'] = browser_steps_get_valid_steps(args[0].get('browser_steps'))
"""Override dict.update() to track modifications to writable fields."""
# Call parent update first
super().update(*args, **kwargs)
@@ -382,7 +376,6 @@ class watch_base(dict):
for key in kwargs.keys():
self._mark_field_as_edited(key)
def pop(self, key, *args):
"""Override dict.pop() to track removal of writable fields."""
result = super().pop(key, *args)
+46 -149
View File
@@ -54,152 +54,34 @@ def _check_cascading_vars(datastore, var_name, watch):
return None
class FormattableTimestamp(str):
"""
A str subclass representing a formatted datetime. As a plain string it renders
with the default format, but can also be called with a custom format argument
in Jinja2 templates:
{{ change_datetime }} '2024-01-15 10:30:00 UTC'
{{ change_datetime(format='%Y') }} '2024'
{{ change_datetime(format='%A') }} 'Monday'
{{ change_datetime(format='%Y-%m-%d') }} '2024-01-15'
Being a str subclass means it is natively JSON serializable.
"""
_DEFAULT_FORMAT = '%Y-%m-%d %H:%M:%S %Z'
def __new__(cls, timestamp):
dt = datetime.datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
local_tz = datetime.datetime.now().astimezone().tzinfo
dt_local = dt.astimezone(local_tz)
try:
formatted = dt_local.strftime(cls._DEFAULT_FORMAT)
except Exception:
formatted = dt_local.isoformat()
instance = super().__new__(cls, formatted)
instance._dt = dt_local
return instance
def __call__(self, format=_DEFAULT_FORMAT):
try:
return self._dt.strftime(format)
except Exception:
return self._dt.isoformat()
class FormattableExtract(str):
"""
A str subclass that holds only the extracted changed fragments from a diff.
Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
{{ diff_changed_from }} old value(s) only, e.g. "$99.99"
{{ diff_changed_to }} new value(s) only, e.g. "$109.99"
Multiple changed fragments are joined with newlines.
Being a str subclass means it is natively JSON serializable.
"""
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
if prev_snapshot or current_snapshot:
from changedetectionio import diff as diff_module
raw = diff_module.render_diff(prev_snapshot, current_snapshot, word_diff=True)
extracted = extract_fn(raw)
else:
extracted = ''
instance = super().__new__(cls, extracted)
return instance
class FormattableDiff(str):
"""
A str subclass representing a rendered diff. As a plain string it renders
with the default options for that variant, but can be called with custom
arguments in Jinja2 templates:
{{ diff }} default diff output
{{ diff(lines=5) }} truncate to 5 lines
{{ diff(added_only=true) }} only show added lines
{{ diff(removed_only=true) }} only show removed lines
{{ diff(context=3) }} 3 lines of context around changes
{{ diff(word_diff=false) }} line-level diff instead of word-level
{{ diff(lines=10, added_only=true) }} combine args
{{ diff_added(lines=5) }} works on any diff_* variant too
Being a str subclass means it is natively JSON serializable.
"""
def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs):
if prev_snapshot or current_snapshot:
from changedetectionio import diff as diff_module
rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs)
else:
rendered = ''
instance = super().__new__(cls, rendered)
instance._prev = prev_snapshot
instance._current = current_snapshot
instance._base_kwargs = base_kwargs
return instance
def __call__(self, lines=None, added_only=False, removed_only=False, context=0,
word_diff=None, case_insensitive=False, ignore_junk=False):
from changedetectionio import diff as diff_module
kwargs = dict(self._base_kwargs)
if added_only:
kwargs['include_removed'] = False
if removed_only:
kwargs['include_added'] = False
if context:
kwargs['context_lines'] = int(context)
if word_diff is not None:
kwargs['word_diff'] = bool(word_diff)
if case_insensitive:
kwargs['case_insensitive'] = True
if ignore_junk:
kwargs['ignore_junk'] = True
result = diff_module.render_diff(self._prev or '', self._current or '', **kwargs)
if lines is not None:
result = '\n'.join(result.splitlines()[:int(lines)])
return result
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
class NotificationContextData(dict):
def __init__(self, initial_data=None, **kwargs):
# ValidateJinja2Template() validates against the keynames of this dict to check for valid tokens in the body (user submission)
super().__init__({
'base_url': None,
'change_datetime': FormattableTimestamp(time.time()),
'current_snapshot': None,
'diff': FormattableDiff('', ''),
'diff_clean': FormattableDiff('', '', include_change_type_prefix=False),
'diff_added': FormattableDiff('', '', include_removed=False),
'diff_added_clean': FormattableDiff('', '', include_removed=False, include_change_type_prefix=False),
'diff_full': FormattableDiff('', '', include_equal=True),
'diff_full_clean': FormattableDiff('', '', include_equal=True, include_change_type_prefix=False),
'diff_patch': FormattableDiff('', '', patch_format=True),
'diff_removed': FormattableDiff('', '', include_added=False),
'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
'diff': None,
'diff_clean': None,
'diff_added': None,
'diff_added_clean': None,
'diff_full': None,
'diff_full_clean': None,
'diff_patch': None,
'diff_removed': None,
'diff_removed_clean': None,
'diff_url': None,
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
'notification_timestamp': time.time(),
'prev_snapshot': None,
'preview_url': None,
'screenshot': None,
'triggered_text': None,
'timestamp_from': None,
'timestamp_to': None,
'triggered_text': None,
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
'watch_mime_type': None,
'watch_tag': None,
'watch_title': None,
'watch_url': 'https://WATCH-PLACE-HOLDER/',
'watch_uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
})
# Apply any initial data passed in
@@ -221,7 +103,7 @@ class NotificationContextData(dict):
So we can test the output in the notification body
"""
for key in self.keys():
if key in ['uuid', 'time', 'watch_uuid', 'change_datetime'] or key.startswith('diff'):
if key in ['uuid', 'time', 'watch_uuid']:
continue
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
self[key] = rand_str
@@ -233,6 +115,24 @@ class NotificationContextData(dict):
super().__setitem__(key, value)
def timestamp_to_localtime(timestamp):
# Format the date using locale-aware formatting with timezone
dt = datetime.datetime.fromtimestamp(int(timestamp))
dt = dt.replace(tzinfo=pytz.UTC)
# Get local timezone-aware datetime
local_tz = datetime.datetime.now().astimezone().tzinfo
local_dt = dt.astimezone(local_tz)
# Format date with timezone - using strftime for locale awareness
try:
formatted_date = local_dt.strftime('%Y-%m-%d %H:%M:%S %Z')
except:
# Fallback if locale issues
formatted_date = local_dt.isoformat()
return formatted_date
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
"""
Efficiently renders only the diff placeholders that are actually used in the notification text.
@@ -250,12 +150,13 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
Returns:
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
"""
from changedetectionio import diff
import re
from functools import lru_cache
now = time.time()
# Define base kwargs for each diff variant — these become the stored defaults
# on the FormattableDiff object, so {{ diff(lines=5) }} overrides on top of them
# Define specifications for each diff variant
diff_specs = {
'diff': {'word_diff': word_diff},
'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False},
@@ -268,27 +169,23 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
}
from changedetectionio.diff import extract_changed_from, extract_changed_to
extract_specs = {
'diff_changed_from': extract_changed_from,
'diff_changed_to': extract_changed_to,
}
# Memoize render_diff to avoid duplicate renders with same kwargs
@lru_cache(maxsize=4)
def cached_render(kwargs_tuple):
return diff.render_diff(prev_snapshot, current_snapshot, **dict(kwargs_tuple))
ret = {}
rendered_count = 0
# Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
# Only check and render diff keys that exist in NotificationContextData
for key in NotificationContextData().keys():
if not key.startswith('diff'):
continue
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
if not re.search(pattern, notification_scan_text, re.IGNORECASE):
continue
if key in diff_specs:
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
rendered_count += 1
elif key in extract_specs:
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
rendered_count += 1
if key.startswith('diff') and key in diff_specs:
# Check if this placeholder is actually used in the notification text
pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
if re.search(pattern, notification_scan_text, re.IGNORECASE):
kwargs = diff_specs[key]
# Convert dict to sorted tuple for cache key (handles duplicate kwarg combinations)
ret[key] = cached_render(tuple(sorted(kwargs.items())))
rendered_count += 1
if rendered_count:
logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
@@ -301,7 +198,7 @@ def set_basic_notification_vars(current_snapshot, prev_snapshot, watch, triggere
'current_snapshot': current_snapshot,
'prev_snapshot': prev_snapshot,
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
'change_datetime': FormattableTimestamp(timestamp_changed) if timestamp_changed else None,
'change_datetime': timestamp_to_localtime(timestamp_changed) if timestamp_changed else None,
'triggered_text': triggered_text,
'uuid': watch.get('uuid') if watch else None,
'watch_url': watch.get('url') if watch else None,
+1 -182
View File
@@ -129,109 +129,6 @@ class ChangeDetectionSpec:
"""
pass
@hookspec
def update_handler_alter(update_handler, watch, datastore):
"""Modify or wrap the update_handler before it processes a watch.
This hook is called after the update_handler (perform_site_check instance) is created
but before it calls call_browser() and run_changedetection(). Plugins can use this to:
- Wrap the handler to add logging/metrics
- Modify handler configuration
- Add custom preprocessing logic
Args:
update_handler: The perform_site_check instance that will process the watch
watch: The watch dict being processed
datastore: The application datastore
Returns:
object or None: Return a modified/wrapped handler, or None to keep the original.
If multiple plugins return handlers, they are chained in registration order.
"""
pass
@hookspec
def update_finalize(update_handler, watch, datastore, processing_exception):
"""Called after watch processing completes (success or failure).
This hook is called in the finally block after all processing is complete,
allowing plugins to perform cleanup, update metrics, or log final status.
The plugin can access update_handler.last_logging_insert_id if it was stored
during update_handler_alter, and use processing_exception to determine if
the processing succeeded or failed.
Args:
update_handler: The perform_site_check instance (may be None if creation failed)
watch: The watch dict that was processed (may be None if not loaded)
datastore: The application datastore
processing_exception: The exception from the main processing block, or None if successful.
This does NOT include cleanup exceptions - only exceptions from
the actual watch processing (fetch, diff, etc).
Returns:
None: This hook doesn't return a value
"""
pass
@hookspec
def get_html_head_extras():
"""Return HTML to inject into the <head> of every page via base.html.
Plugins can use this to add <script>, <style>, or <link> tags that should
be present on all pages. Return a raw HTML string or None.
IMPORTANT: Always use Flask's url_for() for any src/href URLs so that
sub-path deployments (nginx reverse proxy with USE_X_SETTINGS / X-Forwarded-Prefix)
work correctly. This hook is called inside a request context so url_for() is
always available.
For small amounts of CSS/JS, return them inline no file-serving needed::
from changedetectionio.pluggy_interface import hookimpl
@hookimpl
def get_html_head_extras(self):
return (
'<style>.my-module-banner { color: red; }</style>\\n'
'<script>console.log("my_module_content loaded");</script>'
)
For larger assets, register your own lightweight Flask routes in the plugin
module and point to them with url_for() so the sub-path prefix is handled
automatically::
from flask import url_for, Response
from changedetectionio.pluggy_interface import hookimpl
from changedetectionio.flask_app import app as _app
MY_CSS = ".my-module-example { color: red; }"
MY_JS = "console.log('my_module_content loaded');"
@_app.route('/my_module_content/css')
def my_module_content_css():
return Response(MY_CSS, mimetype='text/css',
headers={'Cache-Control': 'max-age=3600'})
@_app.route('/my_module_content/js')
def my_module_content_js():
return Response(MY_JS, mimetype='application/javascript',
headers={'Cache-Control': 'max-age=3600'})
@hookimpl
def get_html_head_extras(self):
css = url_for('my_module_content_css')
js = url_for('my_module_content_js')
return (
f'<link rel="stylesheet" href="{css}">\\n'
f'<script src="{js}" defer></script>'
)
Returns:
str or None: Raw HTML string to inject inside <head>, or None
"""
pass
# Set up Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -602,82 +499,4 @@ def get_plugin_template_paths():
template_paths.append(templates_dir)
logger.debug(f"Added plugin template path: {templates_dir}")
return template_paths
def apply_update_handler_alter(update_handler, watch, datastore):
"""Apply update_handler_alter hooks from all plugins.
Allows plugins to wrap or modify the update_handler before it processes a watch.
Multiple plugins can chain modifications - each plugin receives the result from
the previous plugin.
Args:
update_handler: The perform_site_check instance to potentially modify
watch: The watch dict being processed
datastore: The application datastore
Returns:
object: The (potentially modified/wrapped) update_handler
"""
# Get all plugins that implement the update_handler_alter hook
results = plugin_manager.hook.update_handler_alter(
update_handler=update_handler,
watch=watch,
datastore=datastore
)
# Chain results - each plugin gets the result from the previous one
current_handler = update_handler
if results:
for result in results:
if result is not None:
logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}")
current_handler = result
return current_handler
def apply_update_finalize(update_handler, watch, datastore, processing_exception):
"""Apply update_finalize hooks from all plugins.
Called in the finally block after watch processing completes, allowing plugins
to perform cleanup, update metrics, or log final status.
Args:
update_handler: The perform_site_check instance (may be None)
watch: The watch dict that was processed (may be None)
datastore: The application datastore
processing_exception: The exception from processing, or None if successful
Returns:
None
"""
try:
# Call all plugins that implement the update_finalize hook
plugin_manager.hook.update_finalize(
update_handler=update_handler,
watch=watch,
datastore=datastore,
processing_exception=processing_exception
)
except Exception as e:
# Don't let plugin errors crash the worker
logger.error(f"Error in update_finalize hook: {e}")
logger.exception(f"update_finalize hook exception details:")
def collect_html_head_extras():
"""Collect and combine HTML head extras from all plugins.
Called from a Flask template global so it always runs inside a request context.
This means url_for() works correctly in plugin implementations, including when the
app is deployed under a sub-path via USE_X_SETTINGS / X-Forwarded-Prefix (ProxyFix
sets SCRIPT_NAME so url_for() automatically prepends the prefix).
Returns:
str: Combined HTML string to inject inside <head>, or empty string
"""
results = plugin_manager.hook.get_html_head_extras()
parts = [r for r in results if r]
return "\n".join(parts) if parts else ""
return template_paths
-9
View File
@@ -9,15 +9,6 @@ Some suggestions for the future
- `graphical`
## API schema extension (`api.yaml`)
A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`.
Define a `components.schemas.processor_config_<name>` entry and it will be merged into `WatchBase` at startup,
making `processor_config_<name>` a valid field on all watch create/update API calls.
The fully merged spec is served live at `/api/v1/full-spec`.
See `restock_diff/api.yaml` for a working example.
## Todo
- Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways)
-12
View File
@@ -341,18 +341,6 @@ def get_processor_descriptions():
return descriptions
def wcag_text_color(hex_bg: str) -> str:
"""Return #000000 or #ffffff for maximum WCAG contrast against hex_bg."""
hex_bg = hex_bg.lstrip('#')
if len(hex_bg) != 6:
return '#000000'
r, g, b = (int(hex_bg[i:i+2], 16) / 255 for i in (0, 2, 4))
def lin(c):
return c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4
L = 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b)
return '#000000' if L > 0.179 else '#ffffff'
def generate_processor_badge_colors(processor_name):
"""
Generate consistent colors for a processor badge based on its name.
+1 -35
View File
@@ -1,15 +1,10 @@
import asyncio
import re
import hashlib
from changedetectionio.browser_steps.browser_steps import browser_steps_get_valid_steps
from changedetectionio.content_fetchers.base import Fetcher
from changedetectionio.strtobool import strtobool
from changedetectionio.validate_url import is_private_hostname
from copy import deepcopy
from abc import abstractmethod
import os
from urllib.parse import urlparse
from loguru import logger
SCREENSHOT_FORMAT_JPEG = 'JPEG'
@@ -98,23 +93,6 @@ class difference_detection_processor():
self.last_raw_content_checksum = None
async def validate_iana_url(self):
"""Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop.
Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes
through call_browser().
"""
if strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')):
return
parsed = urlparse(self.watch.link)
if not parsed.hostname:
return
loop = asyncio.get_running_loop()
if await loop.run_in_executor(None, is_private_hostname, parsed.hostname):
raise Exception(
f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address. "
f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow."
)
async def call_browser(self, preferred_proxy_id=None):
from requests.structures import CaseInsensitiveDict
@@ -128,8 +106,6 @@ class difference_detection_processor():
"file:// type access is denied for security reasons."
)
await self.validate_iana_url()
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
@@ -193,7 +169,7 @@ class difference_detection_processor():
)
if self.watch.has_browser_steps:
self.fetcher.browser_steps = browser_steps_get_valid_steps(self.watch.get('browser_steps', []))
self.fetcher.browser_steps = self.watch.get('browser_steps', [])
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
# Tweak the base config with the per-watch ones
@@ -260,16 +236,6 @@ class difference_detection_processor():
# @todo .quit here could go on close object, so we can run JS if change-detected
await self.fetcher.quit(watch=self.watch)
# Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding
# content that gets decoded into surrogate characters (e.g. \udcad). Without this,
# encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc.
# Covers all fetchers (requests, playwright, puppeteer, selenium) in one place.
# Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app.
# See: https://github.com/dgtlmoon/changedetection.io/issues/3952
if self.fetcher.content and isinstance(self.fetcher.content, str):
self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8')
# After init, call run_changedetection() which will do the actual change-detection
def get_extra_watch_config(self, filename):
+4 -1
View File
@@ -42,7 +42,10 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
# Get error information for the template
screenshot_url = watch.get_screenshot()
is_html_webdriver = watch.fetcher_supports_screenshots
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
+1 -7
View File
@@ -100,13 +100,7 @@ class guess_stream_type():
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
self.is_rss = True
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
# A JSONP response starts with an identifier followed by '(' - not valid JSON
if re.match(r'^\w[\w.]*\s*\(', test_content):
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
self.is_plaintext = True
else:
self.is_json = True
self.is_json = True
elif 'pdf' in magic_content_header:
self.is_pdf = True
# magic will call a rss document 'xml'
@@ -1,7 +1,6 @@
from babel.numbers import parse_decimal
from changedetectionio.model.Watch import model as BaseWatch
from decimal import Decimal, InvalidOperation
from typing import Union
import re
@@ -11,8 +10,6 @@ supports_browser_steps = True
supports_text_filters_and_triggers = True
supports_text_filters_and_triggers_elements = True
supports_request_type = True
_price_re = re.compile(r"Price:\s*(\d+(?:\.\d+)?)", re.IGNORECASE)
class Restock(dict):
@@ -34,7 +31,6 @@ class Restock(dict):
if standardized_value:
# Convert to float
# @todo locale needs to be the locale of the webpage
return float(parse_decimal(standardized_value, locale='en'))
return None
@@ -66,22 +62,15 @@ class Restock(dict):
super().__setitem__(key, value)
def get_price_from_history_str(history_str):
m = _price_re.search(history_str)
if not m:
return None
try:
return str(Decimal(m.group(1)))
except InvalidOperation:
return None
class Watch(BaseWatch):
def __init__(self, *arg, **kw):
super().__init__(*arg, **kw)
self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock()
self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else {
'follow_price_changes': True,
'in_stock_processing' : 'in_stock_only'
} #@todo update
def clear_watch(self):
super().clear_watch()
@@ -90,27 +79,13 @@ class Watch(BaseWatch):
def extra_notification_token_values(self):
values = super().extra_notification_token_values()
values['restock'] = self.get('restock', {})
values['restock']['previous_price'] = None
if self.history_n >= 2:
history = self.history
if history and len(history) >=2:
"""Unfortunately for now timestamp is stored as string key"""
sorted_keys = sorted(list(history), key=lambda x: int(x))
sorted_keys.reverse()
price_str = self.get_history_snapshot(timestamp=sorted_keys[-1])
if price_str:
values['restock']['previous_price'] = get_price_from_history_str(price_str)
return values
def extra_notification_token_placeholder_info(self):
values = super().extra_notification_token_placeholder_info()
values.append(('restock.price', "Price detected"))
values.append(('restock.in_stock', "In stock status"))
values.append(('restock.original_price', "Original price at first check"))
values.append(('restock.previous_price', "Previous price in history"))
return values
@@ -1,149 +0,0 @@
components:
schemas:
processor_config_restock_diff:
type: object
description: Configuration for the restock_diff processor (restock and price tracking)
properties:
in_stock_processing:
type: string
enum: [in_stock_only, all_changes, 'off']
default: in_stock_only
description: |
When to trigger on stock changes:
- `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions
- `all_changes`: Trigger on any availability change
- `off`: Disable stock/availability tracking
follow_price_changes:
type: boolean
default: true
description: Monitor and track price changes
price_change_min:
type: [number, 'null']
description: Trigger a notification when the price drops below this value
price_change_max:
type: [number, 'null']
description: Trigger a notification when the price rises above this value
price_change_threshold_percent:
type: [number, 'null']
minimum: 0
maximum: 100
description: Minimum price change percentage since the original price to trigger a notification
paths:
/watch:
post:
x-code-samples:
- lang: 'curl'
label: 'Restock & price tracking'
source: |
curl -X POST "http://localhost:5000/api/v1/watch" \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com/product",
"processor": "restock_diff",
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": true,
"price_change_threshold_percent": 5
}
}'
- lang: 'Python'
label: 'Restock & price tracking'
source: |
import requests
headers = {
'x-api-key': 'YOUR_API_KEY',
'Content-Type': 'application/json'
}
data = {
'url': 'https://example.com/product',
'processor': 'restock_diff',
'processor_config_restock_diff': {
'in_stock_processing': 'in_stock_only',
'follow_price_changes': True,
'price_change_threshold_percent': 5,
}
}
response = requests.post('http://localhost:5000/api/v1/watch',
headers=headers, json=data)
print(response.json())
/watch/{uuid}:
put:
x-code-samples:
- lang: 'curl'
label: 'Update restock config'
source: |
curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"processor_config_restock_diff": {
"in_stock_processing": "all_changes",
"follow_price_changes": true,
"price_change_min": 10.00,
"price_change_max": 500.00
}
}'
- lang: 'Python'
label: 'Update restock config'
source: |
import requests
headers = {
'x-api-key': 'YOUR_API_KEY',
'Content-Type': 'application/json'
}
uuid = 'YOUR-UUID'
data = {
'processor_config_restock_diff': {
'in_stock_processing': 'all_changes',
'follow_price_changes': True,
'price_change_min': 10.00,
'price_change_max': 500.00,
}
}
response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}',
headers=headers, json=data)
print(response.text)
/tag/{uuid}:
put:
x-code-samples:
- lang: 'curl'
label: 'Set restock config on group/tag'
source: |
curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"overrides_watch": true,
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": true,
"price_change_threshold_percent": 10
}
}'
- lang: 'Python'
label: 'Set restock config on group/tag'
source: |
import requests
headers = {
'x-api-key': 'YOUR_API_KEY',
'Content-Type': 'application/json'
}
tag_uuid = 'YOUR-TAG-UUID'
data = {
'overrides_watch': True,
'processor_config_restock_diff': {
'in_stock_processing': 'in_stock_only',
'follow_price_changes': True,
'price_change_threshold_percent': 10,
}
}
response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}',
headers=headers, json=data)
print(response.text)
@@ -31,7 +31,7 @@ class RestockSettingsForm(Form):
follow_price_changes = BooleanField(_l('Follow price changes'), default=True)
class processor_settings_form(processor_text_json_diff_form):
processor_config_restock_diff = FormField(RestockSettingsForm)
restock_settings = FormField(RestockSettingsForm)
def extra_tab_content(self):
return _l('Restock & Price Detection')
@@ -48,34 +48,34 @@ class processor_settings_form(processor_text_json_diff_form):
output += """
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
<script>
<script>
$(document).ready(function () {
toggleOpacity('#processor_config_restock_diff-follow_price_changes', '.price-change-minmax', true);
toggleOpacity('#restock_settings-follow_price_changes', '.price-change-minmax', true);
});
</script>
<fieldset id="restock-fieldset-price-group">
<div class="pure-control-group">
<fieldset class="pure-group inline-radio">
{{ render_field(form.processor_config_restock_diff.in_stock_processing) }}
{{ render_field(form.restock_settings.in_stock_processing) }}
</fieldset>
<fieldset class="pure-group">
{{ render_checkbox_field(form.processor_config_restock_diff.follow_price_changes) }}
{{ render_checkbox_field(form.restock_settings.follow_price_changes) }}
<span class="pure-form-message-inline">Changes in price should trigger a notification</span>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.processor_config_restock_diff.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.restock_settings.price_change_min, placeholder=watch.get('restock', {}).get('price')) }}
<span class="pure-form-message-inline">Minimum amount, Trigger a change/notification when the price drops <i>below</i> this value.</span>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.processor_config_restock_diff.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
{{ render_field(form.restock_settings.price_change_max, placeholder=watch.get('restock', {}).get('price')) }}
<span class="pure-form-message-inline">Maximum amount, Trigger a change/notification when the price rises <i>above</i> this value.</span>
</fieldset>
<fieldset class="pure-group price-change-minmax">
{{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }}
{{ render_field(form.restock_settings.price_change_threshold_percent) }}
<span class="pure-form-message-inline">Price must change more than this % to trigger a change since the first check.</span><br>
<span class="pure-form-message-inline">For example, If the product is $1,000 USD originally, <strong>2%</strong> would mean it has to change more than $20 since the first check.</span><br>
</fieldset>
</fieldset>
</div>
</fieldset>
"""
@@ -437,32 +437,26 @@ class perform_site_check(difference_detection_processor):
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found
#useless
# from ...html_tools import html_to_text
# text = html_to_text(self.fetcher.content)
# logger.debug(f"Length of text after conversion: {len(text)}")
# if not len(text):
# from ...content_fetchers.exceptions import ReplyWithContentButNoText
# raise ReplyWithContentButNoText(url=watch.link,
# status_code=self.fetcher.get_last_status_code(),
# screenshot=self.fetcher.screenshot,
# html_content=self.fetcher.content,
# xpath_data=self.fetcher.xpath_data
# )
from ...html_tools import html_to_text
text = html_to_text(self.fetcher.content)
logger.debug(f"Length of text after conversion: {len(text)}")
if not len(text):
from ...content_fetchers.exceptions import ReplyWithContentButNoText
raise ReplyWithContentButNoText(url=watch.link,
status_code=self.fetcher.get_last_status_code(),
screenshot=self.fetcher.screenshot,
html_content=self.fetcher.content,
xpath_data=self.fetcher.xpath_data
)
# Which restock settings to compare against?
# Settings are stored in restock_diff.json (migrated from watch.json by update_30).
_extra_config = self.get_extra_watch_config('restock_diff.json')
restock_settings = _extra_config.get('restock_diff') or {
'follow_price_changes': True,
'in_stock_processing': 'in_stock_only',
}
restock_settings = watch.get('restock_settings', {})
# See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find
for tag_uuid in watch.get('tags'):
tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {})
if tag.get('overrides_watch'):
restock_settings = tag.get('processor_config_restock_diff') or {}
restock_settings = tag.get('restock_settings', {})
logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override")
break
@@ -283,7 +283,4 @@ def query_price_availability(extracted_data):
if not result.get('availability') and 'availability' in microdata:
result['availability'] = microdata['availability']
# result['price'] could be float or str here, depending on the website, for example it might contain "1,00" commas, etc.
# using something like babel you need to know the locale of the website and even then it can be problematic
# we dont really do anything with the price data so far.. so just accept it the way it comes.
return result
@@ -36,7 +36,7 @@ def _task(watch, update_handler):
def prepare_filter_prevew(datastore, watch_uuid, form_data):
'''Used by @app.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])'''
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
from changedetectionio import forms, html_tools
from changedetectionio.model.Watch import model as watch_model
from concurrent.futures import ThreadPoolExecutor
@@ -154,7 +154,11 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
screenshot_url = watch.get_screenshot()
is_html_webdriver = watch.fetcher_supports_screenshots
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
@@ -347,7 +347,6 @@ class ContentProcessor:
def extract_text_from_html(self, html_content, stream_content_type):
"""Convert HTML to plain text."""
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
return html_tools.html_to_text(
html_content=html_content,
render_anchor_tag_content=do_anchor,
-2
View File
@@ -29,11 +29,9 @@ def register_watch_operation_handlers(socketio, datastore):
# Perform the operation
if op == 'pause':
watch.toggle_pause()
watch.commit()
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
elif op == 'mute':
watch.toggle_mute()
watch.commit()
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
elif op == 'recheck':
# Import here to avoid circular imports
+1 -24
View File
@@ -199,31 +199,8 @@ def handle_watch_update(socketio, **kwargs):
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
The exception is caught inside run_wsgi and routed to self.server.log() it never
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
disconnect correctly via its own disconnect handler and timeout logic."""
try:
from werkzeug.serving import BaseWSGIServer
_original_log = BaseWSGIServer.log
def _filtered_log(self, type, message, *args):
if type == 'error' and 'write() before start_response' in message:
return
_original_log(self, type, message, *args)
BaseWSGIServer.log = _filtered_log
except Exception:
pass
def init_socketio(app, datastore):
"""Initialize SocketIO with the main Flask app"""
_suppress_werkzeug_ws_abrupt_disconnect_noise()
import platform
import sys
@@ -368,4 +345,4 @@ def init_socketio(app, datastore):
logger.info("Socket.IO initialized and attached to main Flask app")
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
return socketio
return socketio
+2 -2
View File
@@ -44,12 +44,12 @@ data_sanity_test () {
cd ..
TMPDIR=$(mktemp -d)
PORT_N=$((5000 + RANDOM % (6501 - 5000)))
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
PID=$!
sleep 5
kill $PID
sleep 2
ALLOW_IANA_RESTRICTED_ADDRESSES=true ./changedetection.py -p $PORT_N -d $TMPDIR &
./changedetection.py -p $PORT_N -d $TMPDIR &
PID=$!
sleep 5
# On a restart the URL should still be there
+80 -64
View File
@@ -17,6 +17,8 @@ $(document).ready(function () {
set_scale();
});
// Should always be disabled
$('#browser_steps-0-operation option[value="Goto site"]').prop("selected", "selected");
$('#browser_steps-0-operation').attr('disabled', 'disabled');
$('#browsersteps-click-start').click(function () {
$("#browsersteps-click-start").fadeOut();
@@ -43,6 +45,12 @@ $(document).ready(function () {
browsersteps_session_id = false;
apply_buttons_disabled = false;
ctx.clearRect(0, 0, c.width, c.height);
set_first_gotosite_disabled();
}
function set_first_gotosite_disabled() {
$('#browser_steps >li:first-child select').val('Goto site').attr('disabled', 'disabled');
$('#browser_steps >li:first-child').css('opacity', '0.5');
}
// Show seconds remaining until the browser interface needs to restart the session
@@ -235,54 +243,14 @@ $(document).ready(function () {
ctx.fill();
}
// Reusable AJAX function for browser step operations
function executeBrowserStep(url, data = {}) {
$('#browser-steps-ui .loader .spinner').fadeIn();
apply_buttons_disabled = true;
$('ul#browser_steps li .control .apply').css('opacity', 0.5);
$("#browsersteps-img").css('opacity', 0.65);
return $.ajax({
method: "POST",
url: url,
data: data,
statusCode: {
400: function () {
alert("There was a problem processing the request, please reload the page.");
$("#loading-status-text").hide();
$('#browser-steps-ui .loader .spinner').fadeOut();
},
401: function (data) {
alert(data.responseText);
$("#loading-status-text").hide();
$('#browser-steps-ui .loader .spinner').fadeOut();
}
}
}).done(function (data) {
xpath_data = data.xpath_data;
$('#browsersteps-img').attr('src', data.screenshot);
$('#browser-steps-ui .loader .spinner').fadeOut();
apply_buttons_disabled = false;
$("#browsersteps-img").css('opacity', 1);
$('ul#browser_steps li .control .apply').css('opacity', 1);
$("#loading-status-text").hide();
}).fail(function (data) {
console.log(data);
if (data.responseText && data.responseText.includes("Browser session expired")) {
disable_browsersteps_ui();
}
apply_buttons_disabled = false;
$("#loading-status-text").hide();
$('ul#browser_steps li .control .apply').css('opacity', 1);
$("#browsersteps-img").css('opacity', 1);
});
}
function start() {
console.log("Starting browser-steps UI");
browsersteps_session_id = false;
// @todo This setting of the first one should be done at the datalayer but wtforms doesnt wanna play nice
$('#browser_steps >li:first-child').removeClass('empty');
set_first_gotosite_disabled();
$('#browser-steps-ui .loader .spinner').show();
// Request a new session
$('.clear,.remove', $('#browser_steps >li:first-child')).hide();
$.ajax({
type: "GET",
url: browser_steps_start_url,
@@ -299,12 +267,11 @@ $(document).ready(function () {
}).done(function (data) {
$("#loading-status-text").fadeIn();
browsersteps_session_id = data.browsersteps_session_id;
// This should trigger 'Goto site'
console.log("Got startup response, requesting Goto-Site (first) step fake click");
$('#browser_steps >li:first-child .apply').click();
browser_interface_seconds_remaining = 500;
// Request goto_site operation
executeBrowserStep(
browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id + "&goto_website_url_first_step=true"
);
set_first_gotosite_disabled();
}).fail(function (data) {
console.log(data);
alert('There was an error communicating with the server.');
@@ -313,6 +280,7 @@ $(document).ready(function () {
}
function disable_browsersteps_ui() {
set_first_gotosite_disabled();
$("#browser-steps-ui").css('opacity', '0.3');
$('#browsersteps-selector-canvas').off("mousemove mousedown click");
}
@@ -360,13 +328,16 @@ $(document).ready(function () {
// Add the extra buttons to the steps
$('ul#browser_steps li').each(function (i) {
var s = '<div class="control">' + '<a data-step-index=' + i + ' class="pure-button button-secondary button-green button-xsmall apply" >Apply</a>&nbsp;';
s += `<a data-step-index="${i}" class="pure-button button-secondary button-xsmall clear" >Clear</a>&nbsp;` +
`<a data-step-index="${i}" class="pure-button button-secondary button-red button-xsmall remove" >Remove</a>`;
if (i > 0) {
// The first step never gets these (Goto-site)
s += `<a data-step-index="${i}" class="pure-button button-secondary button-xsmall clear" >Clear</a>&nbsp;` +
`<a data-step-index="${i}" class="pure-button button-secondary button-red button-xsmall remove" >Remove</a>`;
// if a screenshot is available
if (browser_steps_available_screenshots.includes(i.toString())) {
var d = (browser_steps_last_error_step === i+1) ? 'before' : 'after';
s += `&nbsp;<a data-step-index="${i}" class="pure-button button-secondary button-xsmall show-screenshot" title="Show screenshot from last run" data-type="${d}">Pic</a>&nbsp;`;
// if a screenshot is available
if (browser_steps_available_screenshots.includes(i.toString())) {
var d = (browser_steps_last_error_step === i+1) ? 'before' : 'after';
s += `&nbsp;<a data-step-index="${i}" class="pure-button button-secondary button-xsmall show-screenshot" title="Show screenshot from last run" data-type="${d}">Pic</a>&nbsp;`;
}
}
s += '</div>';
$(this).append(s)
@@ -405,35 +376,80 @@ $(document).ready(function () {
});
$('ul#browser_steps li .control .apply').click(function (event) {
// sequential requests @todo refactor
if (apply_buttons_disabled) {
return;
}
var current_data = $(event.currentTarget).closest('li');
$('#browser-steps-ui .loader .spinner').fadeIn();
apply_buttons_disabled = true;
$('ul#browser_steps li .control .apply').css('opacity', 0.5);
$("#browsersteps-img").css('opacity', 0.65);
var is_last_step = 0;
var step_n = $(event.currentTarget).data('step-index');
// Determine if this is the last configured step
var is_last_step = 0;
// On the last step, we should also be getting data ready for the visual selector
$('ul#browser_steps li select').each(function (i) {
if ($(this).val() !== 'Choose one') {
is_last_step += 1;
}
});
is_last_step = (is_last_step == (step_n + 1));
if (is_last_step == (step_n + 1)) {
is_last_step = true;
} else {
is_last_step = false;
}
console.log("Requesting step via POST " + $("select[id$='operation']", current_data).first().val());
// Execute the browser step
executeBrowserStep(
browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id,
{
// POST the currently clicked step form widget back and await response, redraw
$.ajax({
method: "POST",
url: browser_steps_sync_url + "&browsersteps_session_id=" + browsersteps_session_id,
data: {
'operation': $("select[id$='operation']", current_data).first().val(),
'selector': $("input[id$='selector']", current_data).first().val(),
'optional_value': $("input[id$='optional_value']", current_data).first().val(),
'step_n': step_n,
'is_last_step': is_last_step
},
statusCode: {
400: function () {
// More than likely the CSRF token was lost when the server restarted
alert("There was a problem processing the request, please reload the page.");
$("#loading-status-text").hide();
$('#browser-steps-ui .loader .spinner').fadeOut();
},
401: function (data) {
// More than likely the CSRF token was lost when the server restarted
alert(data.responseText);
$("#loading-status-text").hide();
$('#browser-steps-ui .loader .spinner').fadeOut();
}
}
);
}).done(function (data) {
// it should return the new state (selectors available and screenshot)
xpath_data = data.xpath_data;
$('#browsersteps-img').attr('src', data.screenshot);
$('#browser-steps-ui .loader .spinner').fadeOut();
apply_buttons_disabled = false;
$("#browsersteps-img").css('opacity', 1);
$('ul#browser_steps li .control .apply').css('opacity', 1);
$("#loading-status-text").hide();
set_first_gotosite_disabled();
}).fail(function (data) {
console.log(data);
if (data.responseText.includes("Browser session expired")) {
disable_browsersteps_ui();
}
apply_buttons_disabled = false;
$("#loading-status-text").hide();
$('ul#browser_steps li .control .apply').css('opacity', 1);
$("#browsersteps-img").css('opacity', 1);
});
});
$('ul#browser_steps li .control .show-screenshot').click(function (element) {
-8
View File
@@ -116,14 +116,6 @@ $(document).ready(function () {
$('#realtime-conn-error').show();
});
// Tell the server we're leaving cleanly so it can release the connection
// immediately rather than waiting for a timeout.
// Note: this only fires for voluntary closes (tab/window close, navigation away).
// Hard kills, crashes and network drops will still timeout normally on the server.
window.addEventListener('beforeunload', function () {
socket.disconnect();
});
socket.on('queue_size', function (data) {
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
if(queueSizePagerInfoText) {
+1 -3
View File
@@ -102,9 +102,7 @@
}
// Navigate to search results (always redirect to watchlist home)
// Use base_path if available (for sub-path deployments like /enlighten-richerx)
const basePath = typeof base_path !== 'undefined' ? base_path : '';
window.location.href = basePath + '/?' + params.toString();
window.location.href = '/?' + params.toString();
});
}
});
+1 -1
View File
@@ -1 +1 @@
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces;overflow-wrap:anywhere}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
#diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem}
@@ -62,7 +62,6 @@ body.difference-page {
pre {
white-space: break-spaces;
overflow-wrap: anywhere;
}
File diff suppressed because one or more lines are too long
+7 -30
View File
@@ -22,8 +22,6 @@ import uuid as uuid_builder
from loguru import logger
from blinker import signal
from ..model.Tags import TagsDict
# Try to import orjson for faster JSON serialization
try:
import orjson
@@ -123,11 +121,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
if 'application' in settings_data['settings']:
self.__data['settings']['application'].update(settings_data['settings']['application'])
# Use our Tags dict with cleanup helpers etc
# @todo Same for Watches
existing_tags = settings_data.get('settings', {}).get('application', {}).get('tags') or {}
self.__data['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=self.datastore_path)
# More or less for the old format which had this data in the one url-watches.json
# cant hurt to leave it here,
if 'watching' in settings_data:
@@ -203,7 +196,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
self.datastore_path = datastore_path
# Initialize data structure
self.__data = App.model(datastore_path=datastore_path)
self.__data = App.model()
self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")
# Base definition for all watchers (deepcopy part of #569)
@@ -242,8 +235,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# No datastore yet - check if this is a fresh install or legacy migration
self.init_fresh_install(include_default_watches=include_default_watches,
version_tag=version_tag)
# Maybe they copied a bunch of watch subdirs across too
self._load_state()
def init_fresh_install(self, include_default_watches, version_tag):
# Generate app_guid FIRST (required for all operations)
@@ -362,9 +353,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Deep copy settings to avoid modifying the original
settings_copy = copy.deepcopy(self.__data['settings'])
# Is saved as {uuid}/tag.json
settings_copy['application']['tags'] = {}
return {
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
'app_guid': self.__data.get('app_guid'),
@@ -728,11 +716,8 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
return False
if not is_safe_valid_url(url):
from flask import has_request_context
if has_request_context():
flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
else:
logger.error(f"add_watch: URL '{url}' is not permitted or invalid, skipping.")
flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
return None
# Check PAGE_WATCH_LIMIT if set
@@ -980,20 +965,12 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
def get_all_tags_for_watch(self, uuid):
"""This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet"""
watch = self.data['watching'].get(uuid)
if not watch:
return {}
# Start with manually assigned tags
result = dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
# Should return a dict of full tag info linked by UUID
if watch:
return dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
# Additionally include any tag whose url_match_pattern matches this watch's URL
watch_url = watch.get('url', '')
if watch_url:
for tag_uuid, tag in self.__data['settings']['application']['tags'].items():
if tag_uuid not in result and tag.matches_url(watch_url):
result[tag_uuid] = tag
return result
return {}
@property
def extra_browsers(self):
+3 -55
View File
@@ -669,9 +669,7 @@ class DatastoreUpdatesMixin:
def update_26(self):
self.migrate_legacy_db_format()
# Re-run tag to JSON migration
def update_29(self):
def update_28(self):
"""
Migrate tags to individual tag.json files.
@@ -684,6 +682,8 @@ class DatastoreUpdatesMixin:
- Enables independent tag versioning/backup
- Maintains backwards compatibility (tags stay in settings too)
"""
# Force save as tag.json (not watch.json) even if object is corrupted
logger.critical("=" * 80)
logger.critical("Running migration: Individual tag persistence (update_28)")
logger.critical("Creating individual tag.json files")
@@ -702,9 +702,6 @@ class DatastoreUpdatesMixin:
failed_count = 0
for uuid, tag_data in tags.items():
if os.path.isfile(os.path.join(self.datastore_path, uuid, "tag.json")):
logger.debug(f"Tag {uuid} tag.json exists, skipping")
continue
try:
tag_data.commit()
saved_count += 1
@@ -726,52 +723,3 @@ class DatastoreUpdatesMixin:
logger.info("Future tag edits will update both locations (dual storage)")
logger.critical("=" * 80)
# write it to disk, it will be saved without ['tags'] in the JSON db because we find it from disk glob
# (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
self._save_settings()
def update_30(self):
"""Migrate restock_settings out of watch.json into restock_diff.json processor config file.
Previously, restock_diff processor settings (in_stock_processing, follow_price_changes, etc.)
were stored directly in the watch dict (watch.json). They now belong in a separate per-watch
processor config file (restock_diff.json) consistent with the processor_config_* API system.
For tags: restock_settings key is renamed to processor_config_restock_diff in the tag dict,
matching what the API writes when updating a tag.
Safe to re-run: skips watches that already have a restock_diff.json, skips tags that already
have processor_config_restock_diff set.
"""
import json
# --- Watches ---
for uuid, watch in self.data['watching'].items():
if watch.get('processor') != 'restock_diff':
continue
restock_settings = watch.get('restock_settings')
if not restock_settings:
continue
data_dir = watch.data_dir
if data_dir:
watch.ensure_data_dir_exists()
filepath = os.path.join(data_dir, 'restock_diff.json')
if not os.path.isfile(filepath):
with open(filepath, 'w', encoding='utf-8') as f:
json.dump({'restock_diff': restock_settings}, f, indent=2)
logger.info(f"update_30: migrated restock_settings → {filepath}")
del self.data['watching'][uuid]['restock_settings']
watch.commit()
# --- Tags ---
for tag_uuid, tag in self.data['settings']['application']['tags'].items():
restock_settings = tag.get('restock_settings')
if not restock_settings or tag.get('processor_config_restock_diff'):
continue
tag['processor_config_restock_diff'] = restock_settings
del tag['restock_settings']
tag.commit()
logger.info(f"update_30: migrated tag {tag_uuid} restock_settings → processor_config_restock_diff")
@@ -44,26 +44,13 @@
<td><code>{{ '{{preview_url}}' }}</code></td>
<td>{{ _('The URL of the preview page generated by changedetection.io.') }}</td>
</tr>
<tr>
<td><code>{{ '{{change_datetime}}' }}</code></td>
<td>{{ _('Date/time of the change, accepts format=, change_datetime(format=\'%A\')\', default is \'%Y-%m-%d %H:%M:%S %Z\'') }}</td>
</tr>
<tr>
<td><code>{{ '{{diff_url}}' }}</code></td>
<td>{{ _('The URL of the diff output for the watch.') }}</td>
</tr>
<tr>
<td><code>{{ '{{diff_url}}' }}</code></td>
<td>{{ _('The URL of the diff output for the watch.') }}</td>
</tr>
<tr>
<td><code>{{ '{{diff}}' }}</code></td>
<td>{{ _('The diff output - only changes, additions, and removals') }}<br>
<small>
{{ _('All diff variants accept') }} <code>lines=</code>, <code>context=</code>, <code>word_diff=</code>, <code>ignore_junk=</code> {{ _('args, e.g.') }}
<code>{{ '{{diff(lines=10)}}' }}</code>, <code>{{ '{{diff_added(lines=5, context=2)}}' }}</code>
</small>
</td>
<td>{{ _('The diff output - only changes, additions, and removals') }}</td>
</tr>
<tr>
<td><code>{{ '{{diff_clean}}' }}</code></td>
@@ -98,14 +85,6 @@
<td><code>{{ '{{diff_patch}}' }}</code></td>
<td>{{ _('The diff output - patch in unified format') }}</td>
</tr>
<tr>
<td><code>{{ '{{diff_changed_from}}' }}</code></td>
<td>{{ _('Only the changed words/values from the previous version — e.g. the old price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
</tr>
<tr>
<td><code>{{ '{{diff_changed_to}}' }}</code></td>
<td>{{ _('Only the changed words/values from the new version — e.g. the new price. Best when a single value changes per line; multiple changed fragments are joined by newline.') }}</td>
</tr>
<tr>
<td><code>{{ '{{current_snapshot}}' }}</code></td>
<td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }}
-4
View File
@@ -45,10 +45,6 @@
<script src="{{url_for('static_content', group='js', filename='socket.io.min.js')}}"></script>
<script src="{{url_for('static_content', group='js', filename='realtime.js')}}" defer></script>
{% endif %}
{%- set _html_head_extras = get_html_head_extras() -%}
{%- if _html_head_extras %}
{{ _html_head_extras | safe }}
{%- endif %}
</head>
<body class="{{extra_classes}}">
@@ -10,7 +10,6 @@
<li>{{ _('Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this monitor') }}</li>
<li>{{ _('Each line is processed separately (think of each line as "OR")') }}</li>
<li>{{ _('Note: Wrap in forward slash / to use regex example:') }} <code>/foo\d/</code></li>
<li>{{ _('You can also use')}} <a href="#conditions">{{ _('conditions')}}</a> - {{ _('"Page text" - with Contains, Starts With, Not Contains and many more' ) }} <code>/foo\d/</code></li>
</ul>
</span>
</div>
-46
View File
@@ -2,7 +2,6 @@
import psutil
import time
from threading import Thread
import multiprocessing
import pytest
import arrow
@@ -14,10 +13,6 @@ import sys
# When test server is slow/unresponsive, workers fail fast instead of holding UUIDs for 45s
# This prevents exponential priority growth from repeated deferrals (priority × 10 each defer)
os.environ['DEFAULT_SETTINGS_REQUESTS_TIMEOUT'] = '5'
# Test server runs on localhost (127.0.0.1) which is a private IP.
# Allow it globally so all existing tests keep working; test_ssrf_protection
# uses monkeypatch to temporarily override this for its own assertions.
os.environ['ALLOW_IANA_RESTRICTED_ADDRESSES'] = 'true'
from changedetectionio.flask_app import init_app_secret, changedetection_app
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
@@ -192,34 +187,6 @@ def cleanup(datastore_path):
if os.path.isfile(f):
os.unlink(f)
def pytest_configure(config):
"""Configure pytest environment before tests run.
CRITICAL: Set multiprocessing start method to 'fork' for Python 3.14+ compatibility.
Python 3.14 changed the default start method from 'fork' to 'forkserver' on Linux.
The forkserver method requires all objects to be picklable, but pytest-flask's
LiveServer uses nested functions that can't be pickled.
Setting 'fork' explicitly:
- Maintains compatibility with Python 3.10-3.13 (where 'fork' was already default)
- Fixes Python 3.14 pickling errors
- Only affects Unix-like systems (Windows uses 'spawn' regardless)
See: https://github.com/python/cpython/issues/126831
See: https://docs.python.org/3/whatsnew/3.14.html
"""
# Only set if not already set (respects existing configuration)
if multiprocessing.get_start_method(allow_none=True) is None:
try:
# 'fork' is available on Unix-like systems (Linux, macOS)
# On Windows, this will have no effect as 'spawn' is the only option
multiprocessing.set_start_method('fork', force=False)
logger.debug("Set multiprocessing start method to 'fork' for Python 3.14+ compatibility")
except (ValueError, RuntimeError):
# Already set, not available on this platform, or context already created
pass
def pytest_addoption(parser):
"""Add custom command-line options for pytest.
@@ -364,7 +331,6 @@ def prepare_test_function(live_server, datastore_path):
# Cleanup: Clear watches and queue after test
try:
from changedetectionio.flask_app import update_q
from pathlib import Path
# Clear the queue to prevent leakage to next test
while not update_q.empty():
@@ -374,18 +340,6 @@ def prepare_test_function(live_server, datastore_path):
break
datastore.data['watching'] = {}
# Delete any old watch metadata JSON files
base_path = Path(datastore.datastore_path).resolve()
max_depth = 2
for file in base_path.rglob("*.json"):
# Calculate depth relative to base path
depth = len(file.relative_to(base_path).parts) - 1
if depth <= max_depth and file.is_file():
file.unlink()
except Exception as e:
logger.warning(f"Error during datastore cleanup: {e}")
@@ -1,83 +0,0 @@
"""Test that plugins can inject HTML into base.html <head> via get_html_head_extras hookimpl."""
import pytest
from flask import url_for, Response
from changedetectionio.pluggy_interface import hookimpl, plugin_manager
_MY_JS = "console.log('my_module_content loaded');"
_MY_CSS = ".my-module-example { color: red; }"
class _HeadExtrasPlugin:
"""Test plugin that injects tags pointing at its own Flask routes."""
@hookimpl
def get_html_head_extras(self):
css_url = url_for('test_plugin_my_module_content_css')
js_url = url_for('test_plugin_my_module_content_js')
return (
f'<link rel="stylesheet" id="test-head-extra-css" href="{css_url}">\n'
f'<script id="test-head-extra-js" src="{js_url}" defer></script>'
)
@pytest.fixture(scope='module')
def plugin_routes(live_server):
"""Register plugin asset routes once per module (Flask routes can't be added twice)."""
app = live_server.app
@app.route('/test-plugin/my_module_content/css')
def test_plugin_my_module_content_css():
return Response(_MY_CSS, mimetype='text/css',
headers={'Cache-Control': 'max-age=3600'})
@app.route('/test-plugin/my_module_content/js')
def test_plugin_my_module_content_js():
return Response(_MY_JS, mimetype='application/javascript',
headers={'Cache-Control': 'max-age=3600'})
@pytest.fixture
def head_extras_plugin(plugin_routes):
"""Register the hookimpl for one test then unregister it — function-scoped for clean isolation."""
plugin = _HeadExtrasPlugin()
plugin_manager.register(plugin, name="test_head_extras")
yield plugin
plugin_manager.unregister(name="test_head_extras")
def test_plugin_html_injected_into_head(client, live_server, measure_memory_usage, datastore_path, head_extras_plugin):
"""get_html_head_extras output must appear inside <head> in the rendered page."""
res = client.get(url_for("watchlist.index"), follow_redirects=True)
assert res.status_code == 200
assert b'id="test-head-extra-css"' in res.data, "Plugin <link> tag missing from rendered page"
assert b'id="test-head-extra-js"' in res.data, "Plugin <script> tag missing from rendered page"
head_end = res.data.find(b'</head>')
assert head_end != -1
for marker in (b'id="test-head-extra-css"', b'id="test-head-extra-js"'):
pos = res.data.find(marker)
assert pos != -1 and pos < head_end, f"{marker} must appear before </head>"
def test_plugin_js_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
"""The plugin-registered JS route must return JS with the right Content-Type."""
res = client.get(url_for('test_plugin_my_module_content_js'))
assert res.status_code == 200
assert 'javascript' in res.content_type
assert _MY_JS.encode() in res.data
def test_plugin_css_route_returns_correct_content(client, live_server, measure_memory_usage, datastore_path, plugin_routes):
"""The plugin-registered CSS route must return CSS with the right Content-Type."""
res = client.get(url_for('test_plugin_my_module_content_css'))
assert res.status_code == 200
assert 'css' in res.content_type
assert _MY_CSS.encode() in res.data
def test_no_extras_without_plugin(client, live_server, measure_memory_usage, datastore_path):
"""With no hookimpl registered the markers must not appear (isolation check)."""
res = client.get(url_for("watchlist.index"), follow_redirects=True)
assert b'id="test-head-extra-css"' not in res.data
assert b'id="test-head-extra-js"' not in res.data
@@ -11,10 +11,10 @@ from changedetectionio.tests.util import set_original_response, set_modified_res
set_longer_modified_response, delete_all_watches
import logging
import os
# NOTE - RELIES ON mailserver as hostname running, see github build recipes
smtp_test_server = os.getenv('SMTP_TEST_MAILSERVER', 'mailserver')
smtp_test_server = 'mailserver'
ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys())
-90
View File
@@ -170,14 +170,6 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
headers={'x-api-key': api_key},
)
assert b'(changed) Which is across' in res.data
assert b'Some text thats the same' in res.data
# Fetch the difference between two versions (default text format)
res = client.get(
url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+"?changesOnly=true",
headers={'x-api-key': api_key},
)
assert b'Some text thats the same' not in res.data
# Test htmlcolor format
res = client.get(
@@ -815,88 +807,6 @@ def test_api_import_large_background(client, live_server, measure_memory_usage,
print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
def test_api_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
"""
Test that processor_config_restock_diff is accepted by the API for watches using
restock_diff processor, that its schema is validated (enum values, types), and that
genuinely unknown fields are rejected with an error that originates from the
OpenAPI spec validation layer.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
test_url = url_for('test_endpoint', _external=True)
# Create a watch in restock_diff mode WITH processor_config in the POST body (matches the API docs example)
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"processor": "restock_diff",
"title": "Restock test",
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": True,
"price_change_min": 8888888.0,
}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 201
watch_uuid = res.json.get('uuid')
assert is_valid_uuid(watch_uuid)
# Verify the value set on POST is reflected in the UI edit page (not just via PUT)
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
assert res.status_code == 200
assert b'8888888' in res.data, "price_change_min set via POST should appear in the UI edit form"
# Valid processor_config_restock_diff update via PUT should also be accepted
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"processor_config_restock_diff": {
"in_stock_processing": "all_changes",
"follow_price_changes": False,
"price_change_min": 8888888.0,
"price_change_max": 9999999.0,
}
}),
)
assert res.status_code == 200, f"Valid processor_config_restock_diff should be accepted, got: {res.data}"
# Verify the updated value is still reflected in the UI edit page
res = client.get(url_for("ui.ui_edit.edit_page", uuid=watch_uuid))
assert res.status_code == 200
assert b'8888888' in res.data, "price_change_min set via PUT should appear in the UI edit form"
# An invalid enum value inside processor_config_restock_diff should be rejected by the spec
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"processor_config_restock_diff": {
"in_stock_processing": "not_a_valid_enum_value"
}
}),
)
assert res.status_code == 400, "Invalid enum value in processor config should be rejected"
assert b'Validation failed' in res.data, "Rejection should come from OpenAPI spec validation layer"
# A completely unknown field should be rejected (either by OpenAPI spec validation or
# the application-level field filter — both are acceptable gatekeepers)
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"field_that_is_not_in_the_spec_at_all": "some value"}),
)
assert res.status_code == 400, "Unknown fields should be rejected"
assert (b'Validation failed' in res.data or b'Unknown field' in res.data), \
"Rejection should come from either the OpenAPI spec validation layer or application field filter"
delete_all_watches(client)
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):
+2 -56
View File
@@ -9,51 +9,7 @@ by testing various scenarios that should trigger validation errors.
import time
import json
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
def test_openapi_merged_spec_contains_restock_fields():
"""
Unit test: verify that build_merged_spec_dict() correctly merges the
restock_diff processor api.yaml into the base spec so that
WatchBase.properties includes processor_config_restock_diff with all
expected sub-fields. No live server required.
"""
from changedetectionio.api import build_merged_spec_dict
spec = build_merged_spec_dict()
schemas = spec['components']['schemas']
# The merged schema for processor_config_restock_diff should exist
assert 'processor_config_restock_diff' in schemas, \
"processor_config_restock_diff schema missing from merged spec"
restock_schema = schemas['processor_config_restock_diff']
props = restock_schema.get('properties', {})
expected_fields = {
'in_stock_processing',
'follow_price_changes',
'price_change_min',
'price_change_max',
'price_change_threshold_percent',
}
missing = expected_fields - set(props.keys())
assert not missing, f"Missing fields in processor_config_restock_diff schema: {missing}"
# in_stock_processing must be an enum with the three valid values
enum_values = set(props['in_stock_processing'].get('enum', []))
assert enum_values == {'in_stock_only', 'all_changes', 'off'}, \
f"Unexpected enum values for in_stock_processing: {enum_values}"
# WatchBase.properties must carry a $ref to the restock schema so the
# validation middleware can enforce it on every POST/PUT to /watch
watchbase_props = schemas['WatchBase']['properties']
assert 'processor_config_restock_diff' in watchbase_props, \
"processor_config_restock_diff not wired into WatchBase.properties"
ref = watchbase_props['processor_config_restock_diff'].get('$ref', '')
assert 'processor_config_restock_diff' in ref, \
f"WatchBase.processor_config_restock_diff should $ref the schema, got: {ref}"
from .util import live_server_setup, wait_for_all_checks
def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage, datastore_path):
@@ -71,7 +27,6 @@ def test_openapi_validation_invalid_content_type_on_create_watch(client, live_se
# Should get 400 error due to OpenAPI validation failure
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
assert b"Validation failed" in res.data, "Should contain validation error message"
delete_all_watches(client)
def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage, datastore_path):
@@ -89,7 +44,6 @@ def test_openapi_validation_missing_required_field_create_watch(client, live_ser
# Should get 400 error due to missing required field
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
assert b"Validation failed" in res.data, "Should contain validation error message"
delete_all_watches(client)
def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage, datastore_path):
@@ -129,7 +83,6 @@ def test_openapi_validation_invalid_field_in_request_body(client, live_server, m
# Backend validation now returns "Unknown field(s):" message
assert b"Unknown field" in res.data, \
"Should contain validation error about unknown fields"
delete_all_watches(client)
def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path):
@@ -147,7 +100,6 @@ def test_openapi_validation_import_wrong_content_type(client, live_server, measu
# Should get 400 error due to content-type mismatch
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
assert b"Validation failed" in res.data, "Should contain validation error message"
delete_all_watches(client)
def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage, datastore_path):
@@ -165,7 +117,6 @@ def test_openapi_validation_import_correct_content_type_succeeds(client, live_se
# Should succeed
assert res.status_code == 200, f"Expected 200 but got {res.status_code}"
assert len(res.json) == 2, "Should import 2 URLs"
delete_all_watches(client)
def test_openapi_validation_get_requests_bypass_validation(client, live_server, measure_memory_usage, datastore_path):
@@ -190,7 +141,6 @@ def test_openapi_validation_get_requests_bypass_validation(client, live_server,
# Should return JSON with watch list (empty in this case)
assert isinstance(res.json, dict), "Should return JSON dictionary for watch list"
delete_all_watches(client)
def test_openapi_validation_create_tag_missing_required_title(client, live_server, measure_memory_usage, datastore_path):
@@ -208,13 +158,10 @@ def test_openapi_validation_create_tag_missing_required_title(client, live_serve
# Should get 400 error due to missing required field
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
assert b"Validation failed" in res.data, "Should contain validation error message"
delete_all_watches(client)
def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage, datastore_path):
"""Test that watch updates allow partial updates without requiring all fields (positive test)."""
#xxx
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# First create a valid watch
@@ -251,5 +198,4 @@ def test_openapi_validation_watch_update_allows_partial_updates(client, live_ser
)
assert res.status_code == 200
assert res.json.get('title') == 'Updated Title Only', "Title should be updated"
assert res.json.get('url') == 'https://example.com', "URL should remain unchanged"
delete_all_watches(client)
assert res.json.get('url') == 'https://example.com', "URL should remain unchanged"
-91
View File
@@ -176,97 +176,6 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
assert res.status_code == 204
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
"""
Test that a tag/group can be created and updated with processor_config_restock_diff via the API.
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
# Create a tag with processor_config_restock_diff in a single POST (issue #3966)
res = client.post(
url_for("tag"),
data=json.dumps({
"title": "Restock Group",
"overrides_watch": True,
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": True,
"price_change_min": 7777777
}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key}
)
assert res.status_code == 201, f"POST tag with restock config failed: {res.data}"
tag_uuid = res.json.get('uuid')
# Verify processor config was saved during creation (the bug: these were discarded)
res = client.get(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key}
)
assert res.status_code == 200
tag_data = res.json
assert tag_data.get('overrides_watch') == True, "overrides_watch should be saved on POST"
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only', \
"processor_config_restock_diff should be saved on POST"
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 7777777, \
"price_change_min should be saved on POST"
# Update tag with valid processor_config_restock_diff via PUT
res = client.put(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"overrides_watch": True,
"processor_config_restock_diff": {
"in_stock_processing": "in_stock_only",
"follow_price_changes": True,
"price_change_min": 8888888
}
})
)
assert res.status_code == 200, f"PUT tag with restock config failed: {res.data}"
# Verify the config was stored via API
res = client.get(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key}
)
assert res.status_code == 200
tag_data = res.json
assert tag_data.get('overrides_watch') == True
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only'
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 8888888
# Verify the value is also reflected in the UI tag edit page
res = client.get(url_for("tags.form_tag_edit", uuid=tag_uuid))
assert res.status_code == 200
assert b'8888888' in res.data, "price_change_min set via API should appear in the UI tag edit form"
# Invalid enum value should be rejected by OpenAPI spec validation
res = client.put(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({
"processor_config_restock_diff": {
"in_stock_processing": "not_a_valid_value"
}
})
)
assert res.status_code == 400
assert b'Validation failed' in res.data
# Clean up
res = client.delete(
url_for("tag", uuid=tag_uuid),
headers={'x-api-key': api_key}
)
assert res.status_code == 204
def test_roundtrip_API(client, live_server, measure_memory_usage, datastore_path):
"""
Test the full round trip, this way we test the default Model fits back into OpenAPI spec
+2 -34
View File
@@ -6,6 +6,8 @@ from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
extract_UUID_from_client, delete_all_watches
sleep_time_for_fetch_thread = 3
# Basic test to check inscriptus is not adding return line chars, basically works etc
def test_inscriptus():
@@ -48,15 +50,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# Check this class does not appear (that we didnt see the actual source)
assert b'foobar-detection' not in res.data
# Check POST preview
res = client.post(
url_for("ui.ui_preview.preview_page", uuid="first"),
follow_redirects=True
)
# Check this class does not appear (that we didnt see the actual source)
assert b'foobar-detection' not in res.data
# Make a change
set_modified_response(datastore_path=datastore_path)
@@ -422,28 +415,3 @@ def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server
assert b'&lt;foobar' not in res.data
res = delete_all_watches(client)
def test_last_error_cleared_on_same_checksum(client, live_server, datastore_path):
"""last_error should be cleared even when content is unchanged (checksumFromPreviousCheckWasTheSame path)"""
set_original_response(datastore_path=datastore_path)
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
# First check - establishes baseline checksum
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Inject a stale last_error directly (simulates a prior failed check)
datastore = client.application.config.get('DATASTORE')
datastore.update_watch(uuid=uuid, update_obj={'last_error': 'Some previous error'})
assert datastore.data['watching'][uuid].get('last_error') == 'Some previous error'
# Second check - same content, so checksumFromPreviousCheckWasTheSame will fire
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# last_error must be cleared even though no change was detected
assert datastore.data['watching'][uuid].get('last_error') == False
delete_all_watches(client)
+29 -154
View File
@@ -3,13 +3,14 @@
from .util import set_original_response, live_server_setup, wait_for_all_checks
from flask import url_for
import io
from zipfile import ZipFile, ZIP_DEFLATED
from zipfile import ZipFile
import re
import time
from changedetectionio.model import Watch, Tag
def test_backup(client, live_server, measure_memory_usage, datastore_path):
# live_server_setup(live_server) # Setup on conftest per function
set_original_response(datastore_path=datastore_path)
@@ -31,7 +32,7 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
time.sleep(4)
res = client.get(
url_for("backups.create"),
url_for("backups.index"),
follow_redirects=True
)
# Can see the download link to the backup
@@ -68,9 +69,6 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
# Check for changedetection.json (settings file)
assert 'changedetection.json' in l, "changedetection.json should be in backup"
# secret.txt must never be included — it contains the Flask session key
assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
# Get the latest one
res = client.get(
url_for("backups.remove_backups"),
@@ -82,18 +80,37 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
def test_watch_data_package_download(client, live_server, measure_memory_usage, datastore_path):
"""Test downloading a single watch's data as a zip package"""
import os
import json
set_original_response(datastore_path=datastore_path)
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
tag_uuid = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag")
tag_uuid2 = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag number two")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# Add a watch
res = client.post(
url_for("imports.import_page"),
data={"urls": url_for('test_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Get the UUID directly from the datastore
# Find the watch directories
uuid = None
for item in os.listdir(datastore_path):
item_path = os.path.join(datastore_path, item)
if os.path.isdir(item_path) and len(item) == 36: # UUID format
uuid = item
break
assert uuid is not None, "Could not find watch UUID in datastore"
# Download the watch data package
res = client.get(url_for("ui.ui_edit.watch_get_data_package", uuid=uuid))
res = client.get(
url_for("ui.ui_edit.watch_get_data_package", uuid=uuid),
follow_redirects=True
)
# Should get the right zip content type
assert res.content_type == "application/zip"
@@ -116,146 +133,4 @@ def test_watch_data_package_download(client, live_server, measure_memory_usage,
# Should contain history/snapshot files
uuid4hex_txt = re.compile(f'^{re.escape(uuid)}/.*\\.txt', re.I)
txt_files = list(filter(uuid4hex_txt.match, files))
assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}"
def test_backup_restore(client, live_server, measure_memory_usage, datastore_path):
"""Test that a full backup zip can be restored — watches and tags survive a round-trip."""
set_original_response(datastore_path=datastore_path)
datastore = live_server.app.config['DATASTORE']
watch_url = url_for('test_endpoint', _external=True)
# Set up: one watch and two tags
uuid = datastore.add_watch(url=watch_url)
tag_uuid = datastore.add_tag(title="Tasty backup tag")
tag_uuid2 = datastore.add_tag(title="Tasty backup tag number two")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Create a full backup
client.get(url_for("backups.request_backup"), follow_redirects=True)
time.sleep(4)
# Download the latest backup zip
res = client.get(url_for("backups.download_backup", filename="latest"), follow_redirects=True)
assert res.content_type == "application/zip"
zip_data = res.data
# Confirm the zip contains both watch.json and tag.json entries
backup = ZipFile(io.BytesIO(zip_data))
names = backup.namelist()
assert f"{uuid}/watch.json" in names, f"watch.json missing from backup: {names}"
assert f"{tag_uuid}/tag.json" in names, f"tag.json for tag 1 missing from backup: {names}"
assert f"{tag_uuid2}/tag.json" in names, f"tag.json for tag 2 missing from backup: {names}"
# --- Wipe everything ---
datastore.delete('all')
client.get(url_for("tags.delete_all"), follow_redirects=True)
assert uuid not in datastore.data['watching'], "Watch should be gone after delete"
assert tag_uuid not in datastore.data['settings']['application']['tags'], "Tag 1 should be gone after delete"
assert tag_uuid2 not in datastore.data['settings']['application']['tags'], "Tag 2 should be gone after delete"
# --- Restore from the backup zip ---
res = client.post(
url_for("backups.restore.backups_restore_start"),
data={
'zip_file': (io.BytesIO(zip_data), 'backup.zip'),
'include_groups': 'y',
'include_groups_replace_existing': 'y',
'include_watches': 'y',
'include_watches_replace_existing': 'y',
},
content_type='multipart/form-data',
follow_redirects=True
)
assert res.status_code == 200
# Wait for the thread to finish
time.sleep(2)
# --- Watch checks ---
restored_watch = datastore.data['watching'].get(uuid)
assert restored_watch is not None, f"Watch {uuid} not found after restore"
assert restored_watch['url'] == watch_url, "Restored watch URL does not match"
assert isinstance(restored_watch, Watch.model), \
f"Watch not properly rehydrated, got {type(restored_watch)}"
assert restored_watch.history_n >= 1, \
f"Restored watch should have at least 1 history entry, got {restored_watch.history_n}"
# --- Tag checks ---
restored_tags = datastore.data['settings']['application']['tags']
restored_tag = restored_tags.get(tag_uuid)
assert restored_tag is not None, f"Tag {tag_uuid} not found after restore"
assert restored_tag['title'] == "Tasty backup tag", "Restored tag 1 title does not match"
assert isinstance(restored_tag, Tag.model), \
f"Tag 1 not properly rehydrated, got {type(restored_tag)}"
restored_tag2 = restored_tags.get(tag_uuid2)
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
assert isinstance(restored_tag2, Tag.model), \
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
"""Zip Slip path traversal entries in a restore zip must be rejected."""
import pytest
from changedetectionio.blueprint.backups.restore import import_from_zip
# Build a zip with a path traversal entry that would escape the extraction dir
malicious_zip = io.BytesIO()
with ZipFile(malicious_zip, 'w') as zf:
zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
malicious_zip.seek(0)
datastore = live_server.app.config['DATASTORE']
with pytest.raises(ValueError, match="Zip Slip"):
import_from_zip(
zip_stream=malicious_zip,
datastore=datastore,
include_groups=True,
include_groups_replace=True,
include_watches=True,
include_watches_replace=True,
)
def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
"""A zip whose total uncompressed size exceeds the limit must be rejected.
The guard reads file_size from the zip central-directory metadata no
actual decompression happens, so this test is fast and uses minimal RAM.
100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
50 KB is enough to trigger the check without creating any large files.
"""
import pytest
import changedetectionio.blueprint.backups.restore as restore_mod
from changedetectionio.blueprint.backups.restore import import_from_zip
# ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
bomb_zip = io.BytesIO()
with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
zf.writestr("data.txt", b"\x00" * (100 * 1024))
bomb_zip.seek(0)
datastore = live_server.app.config['DATASTORE']
original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
try:
restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test
with pytest.raises(ValueError, match="decompressed size"):
import_from_zip(
zip_stream=bomb_zip,
datastore=datastore,
include_groups=True,
include_groups_replace=True,
include_watches=True,
include_watches_replace=True,
)
finally:
restore_mod._MAX_DECOMPRESSED_BYTES = original_limit
assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}"
-64
View File
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
# coding=utf-8
import hashlib
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
@@ -12,69 +11,6 @@ import os
def test_surrogate_characters_in_content_are_sanitized():
"""Lone surrogates can appear in requests' r.text when a server returns malformed/mixed-encoding
content. Without sanitization, encoding to UTF-8 raises UnicodeEncodeError.
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
"""
content_with_surrogate = '<html><body>Hello \udcad World</body></html>'
# Confirm the raw problem exists
with pytest.raises(UnicodeEncodeError):
content_with_surrogate.encode('utf-8')
# Our fix: sanitize after fetcher.run() in processors/base.py call_browser()
sanitized = content_with_surrogate.encode('utf-8', errors='replace').decode('utf-8')
assert 'Hello' in sanitized
assert 'World' in sanitized
assert '\udcad' not in sanitized
# Checksum computation (processors/base.py get_raw_document_checksum) must not crash
hashlib.md5(sanitized.encode('utf-8')).hexdigest()
def test_utf8_content_without_charset_header(client, live_server, datastore_path):
"""Server returns UTF-8 content but no charset in Content-Type header.
chardet can misdetect such pages as UTF-7 (Python 3.14 then produces surrogates).
Our fix tries UTF-8 first before falling back to chardet.
See: https://github.com/dgtlmoon/changedetection.io/issues/3952
"""
from .util import write_test_file_and_sync
# UTF-8 encoded content with non-ASCII chars - no charset will be in the header
html = '<html><body><p>Español</p><p>Français</p><p>日本語</p></body></html>'
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('utf-8'), mode='wb')
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
# Should decode correctly as UTF-8, not produce mojibake (Español) or replacement chars
assert 'Español'.encode('utf-8') in res.data
assert 'Français'.encode('utf-8') in res.data
assert '日本語'.encode('utf-8') in res.data
def test_shiftjis_with_meta_charset(client, live_server, datastore_path):
"""Server returns Shift-JIS content with no charset in HTTP header, but the HTML
declares <meta charset="Shift-JIS">. We should use the meta tag, not chardet.
Real-world case: https://github.com/dgtlmoon/changedetection.io/issues/3952
"""
from .util import write_test_file_and_sync
japanese_text = '日本語のページ'
html = f'<html><head><meta http-equiv="Content-Type" content="text/html;charset=Shift-JIS"></head><body><p>{japanese_text}</p></body></html>'
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), html.encode('shift_jis'), mode='wb')
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("ui.ui_preview.preview_page", uuid="first"), follow_redirects=True)
assert japanese_text.encode('utf-8') in res.data
def set_html_response(datastore_path):
test_return_data = """
<html><body><span class="nav_second_img_text">
@@ -6,6 +6,10 @@ from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
import os
sleep_time_for_fetch_thread = 3
def test_check_extract_text_from_diff(client, live_server, measure_memory_usage, datastore_path):
import time
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
-73
View File
@@ -624,76 +624,3 @@ def test_session_locale_overrides_accept_language(client, live_server, measure_m
assert "".encode() in res.data, "Expected Korean '' for Minutes"
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
def test_clear_history_translated_confirmation(client, live_server, measure_memory_usage, datastore_path):
"""
Test that clearing snapshot history works with translated confirmation text.
Issue #3865: When the app language is set to German, the clear history
confirmation dialog shows the translated word (e.g. 'loschen') but the
backend only accepted the English word 'clear', making it impossible
to clear snapshots in non-English languages.
"""
from flask import url_for
test_url = url_for('test_endpoint', _external=True)
# Add a watch so there is history to clear
res = client.post(
url_for("imports.import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Set language to German
res = client.get(
url_for("set_language", locale="de"),
follow_redirects=True
)
assert res.status_code == 200
# Verify the clear history page shows the German confirmation word
res = client.get(
url_for("ui.clear_all_history"),
follow_redirects=True
)
assert res.status_code == 200
assert "löschen".encode() in res.data, "Expected German word 'loschen' on clear history page"
# Submit the form with the German translated word
res = client.post(
url_for("ui.clear_all_history"),
data={"confirmtext": "löschen"},
follow_redirects=True
)
assert res.status_code == 200
# Should NOT show error message
assert b"Incorrect confirmation text" not in res.data, \
"German confirmation word 'loschen' should be accepted (issue #3865)"
# Switch back to English and verify English word still works
res = client.get(
url_for("set_language", locale="en_US"),
follow_redirects=True
)
res = client.post(
url_for("ui.clear_all_history"),
data={"confirmtext": "clear"},
follow_redirects=True
)
assert res.status_code == 200
assert b"Incorrect confirmation text" not in res.data, \
"English confirmation word 'clear' should still be accepted"
# Verify that missing/empty confirmtext does not crash the server
res = client.post(
url_for("ui.clear_all_history"),
data={},
follow_redirects=True
)
assert res.status_code == 200, \
"Missing confirmtext should not crash the server"
@@ -41,6 +41,7 @@ def set_modified_ignore_response(datastore_path):
def test_render_anchor_tag_content_true(client, live_server, measure_memory_usage, datastore_path):
"""Testing that the link changes are detected when
render_anchor_tag_content setting is set to true"""
sleep_time_for_fetch_thread = 3
# Give the endpoint time to spin up
time.sleep(1)
@@ -100,6 +100,7 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me
# Tests the whole stack works with staus codes ignored
def test_403_page_check_works_with_ignore_status_code(client, live_server, measure_memory_usage, datastore_path):
sleep_time_for_fetch_thread = 3
set_original_response(datastore_path=datastore_path)
@@ -111,7 +112,8 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Goto the edit page, check our ignore option
# Add our URL to the import page
@@ -2,9 +2,10 @@
import time
from flask import url_for
from . util import live_server_setup
import os
from .util import live_server_setup, delete_all_watches, wait_for_all_checks
# Should be the same as set_original_ignore_response(datastore_path=datastore_path) but with a little more whitespacing
@@ -49,7 +50,10 @@ def set_original_ignore_response(datastore_path):
# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_check_ignore_whitespace(client, live_server, measure_memory_usage, datastore_path):
sleep_time_for_fetch_thread = 3
# Give the endpoint time to spin up
time.sleep(1)
set_original_ignore_response(datastore_path=datastore_path)
@@ -70,17 +74,17 @@ def test_check_ignore_whitespace(client, live_server, measure_memory_usage, data
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
set_original_ignore_response_but_with_whitespace(datastore_path)
wait_for_all_checks(client)
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
wait_for_all_checks(client)
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'has-unread-changes' class)
res = client.get(url_for("watchlist.index"))
@@ -16,51 +16,6 @@ except ModuleNotFoundError:
def test_jsonp_treated_as_plaintext():
from ..processors.magic import guess_stream_type
# JSONP content (server wrongly claims application/json) should be detected as plaintext
# Callback names are arbitrary identifiers, not always 'cb'
jsonp_content = 'jQuery123456({ "version": "8.0.41", "url": "https://example.com/app.apk" })'
result = guess_stream_type(http_content_header="application/json", content=jsonp_content)
assert result.is_json is False
assert result.is_plaintext is True
# Variation with dotted callback name e.g. jQuery.cb(...)
jsonp_dotted = 'some.callback({ "version": "1.0" })'
result = guess_stream_type(http_content_header="application/json", content=jsonp_dotted)
assert result.is_json is False
assert result.is_plaintext is True
# Real JSON should still be detected as JSON
json_content = '{ "version": "8.0.41", "url": "https://example.com/app.apk" }'
result = guess_stream_type(http_content_header="application/json", content=json_content)
assert result.is_json is True
assert result.is_plaintext is False
def test_jsonp_json_filter_extraction():
from .. import html_tools
# Tough case: dotted namespace callback, trailing semicolon, deeply nested content with arrays
jsonp_content = 'weixin.update.callback({"platforms": {"android": {"variants": [{"arch": "arm64", "versionName": "8.0.68", "url": "https://example.com/app-arm64.apk"}, {"arch": "arm32", "versionName": "8.0.41", "url": "https://example.com/app-arm32.apk"}]}}});'
# Deep nested jsonpath filter into array element
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[0].versionName")
assert text == '"8.0.68"'
# Filter that selects the second array element
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[1].arch")
assert text == '"arm32"'
if jq_support:
text = html_tools.extract_json_as_string(jsonp_content, "jq:.platforms.android.variants[0].versionName")
assert text == '"8.0.68"'
text = html_tools.extract_json_as_string(jsonp_content, "jqraw:.platforms.android.variants[1].url")
assert text == "https://example.com/app-arm32.apk"
def test_unittest_inline_html_extract():
# So lets pretend that the JSON we want is inside some HTML
content="""
+2 -17
View File
@@ -17,7 +17,6 @@ from changedetectionio.notification import (
)
from ..diff import HTML_CHANGED_STYLE
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from ..notification_service import FormattableTimestamp
# Hard to just add more live server URLs when one test is already running (I think)
@@ -108,11 +107,7 @@ def test_check_notification(client, live_server, measure_memory_usage, datastore
"Diff Added: {{diff_added}}\n"
"Diff Removed: {{diff_removed}}\n"
"Diff Full: {{diff_full}}\n"
"Diff with args: {{diff(context=3)}}"
"Diff as Patch: {{diff_patch}}\n"
"Change datetime: {{change_datetime}}\n"
"Change datetime format: Weekday {{change_datetime(format='%A')}}\n"
"Change datetime format: {{change_datetime(format='%Y-%m-%dT%H:%M:%S%z')}}\n"
":-)",
"notification_screenshot": True,
"notification_format": 'text'}
@@ -140,6 +135,8 @@ def test_check_notification(client, live_server, measure_memory_usage, datastore
assert bytes(notification_url.encode('utf-8')) in res.data
assert bytes("New ChangeDetection.io Notification".encode('utf-8')) in res.data
## Now recheck, and it should have sent the notification
wait_for_all_checks(client)
set_modified_response(datastore_path=datastore_path)
@@ -175,23 +172,11 @@ def test_check_notification(client, live_server, measure_memory_usage, datastore
assert ":-)" in notification_submission
assert "New ChangeDetection.io Notification - {}".format(test_url) in notification_submission
assert test_url in notification_submission
assert ':-)' in notification_submission
# Check the attachment was added, and that it is a JPEG from the original PNG
notification_submission_object = json.loads(notification_submission)
assert notification_submission_object
import time
# Could be from a few seconds ago (when the notification was fired vs in this test checking), so check for any
times_possible = [str(FormattableTimestamp(int(time.time()) - i)) for i in range(15)]
assert any(t in notification_submission for t in times_possible)
txt = f"Weekday {FormattableTimestamp(int(time.time()))(format='%A')}"
assert txt in notification_submission
# We keep PNG screenshots for now
# IF THIS FAILS YOU SHOULD BE TESTING WITH ENV VAR REMOVE_REQUESTS_OLD_SCREENSHOTS=False
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.png'
@@ -109,7 +109,7 @@ def test_itemprop_price_change(client, live_server, measure_memory_usage, datast
set_original_response(props_markup=instock_props[0], price='120.45', datastore_path=datastore_path)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"processor_config_restock_diff-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
data={"restock_settings-follow_price_changes": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
follow_redirects=True
)
assert b"Updated watch." in res.data
@@ -204,9 +204,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form, datastore_path):
def test_restock_itemprop_minmax(client, live_server, measure_memory_usage, datastore_path):
extras = {
"processor_config_restock_diff-follow_price_changes": "y",
"processor_config_restock_diff-price_change_min": 900.0,
"processor_config_restock_diff-price_change_max": 1100.10
"restock_settings-follow_price_changes": "y",
"restock_settings-price_change_min": 900.0,
"restock_settings-price_change_max": 1100.10
}
_run_test_minmax_limit(client, extra_watch_edit_form=extras, datastore_path=datastore_path)
@@ -223,9 +223,9 @@ def test_restock_itemprop_with_tag(client, live_server, measure_memory_usage, da
res = client.post(
url_for("tags.form_tag_edit_submit", uuid="first"),
data={"name": "test-tag",
"processor_config_restock_diff-follow_price_changes": "y",
"processor_config_restock_diff-price_change_min": 900.0,
"processor_config_restock_diff-price_change_max": 1100.10,
"restock_settings-follow_price_changes": "y",
"restock_settings-price_change_min": 900.0,
"restock_settings-price_change_max": 1100.10,
"overrides_watch": "y", #overrides_watch should be restock_overrides_watch
},
follow_redirects=True
@@ -258,8 +258,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={"processor_config_restock_diff-follow_price_changes": "y",
"processor_config_restock_diff-price_change_threshold_percent": 5.0,
data={"restock_settings-follow_price_changes": "y",
"restock_settings-price_change_threshold_percent": 5.0,
"url": test_url,
"tags": "",
"headers": "",
@@ -305,8 +305,8 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid),
data={"processor_config_restock_diff-follow_price_changes": "y",
"processor_config_restock_diff-price_change_threshold_percent": 5.05,
data={"restock_settings-follow_price_changes": "y",
"restock_settings-price_change_threshold_percent": 5.05,
"processor": "text_json_diff",
"url": test_url,
'fetch_backend': "html_requests",
@@ -350,7 +350,6 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
res = client.get(url_for("settings.settings_page"))
assert b'{{restock.original_price}}' in res.data
assert b'{{restock.previous_price}}' in res.data
assert b'Original price at first check' in res.data
#####################
@@ -359,7 +358,7 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
url_for("settings.settings_page"),
data={"application-notification_urls": notification_url,
"application-notification_title": "title new price {{restock.price}}",
"application-notification_body": "new price {{restock.price}} previous price {{restock.previous_price}} instock {{restock.in_stock}}",
"application-notification_body": "new price {{restock.price}}",
"application-notification_format": default_notification_format,
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
@@ -373,6 +372,8 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
assert b"Settings updated." in res.data
set_original_response(props_markup=instock_props[0], price='960.45', datastore_path=datastore_path)
# A change in price, should trigger a change by default
set_original_response(props_markup=instock_props[0], price='1950.45', datastore_path=datastore_path)
client.get(url_for("ui.form_watch_checknow"))
@@ -383,7 +384,6 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
notification = f.read()
assert "new price 1950.45" in notification
assert "title new price 1950.45" in notification
assert "previous price 960.45" in notification
## Now test the "SEND TEST NOTIFICATION" is working
os.unlink(os.path.join(datastore_path, "notification.txt"))
@@ -467,38 +467,3 @@ def test_special_prop_examples(client, live_server, measure_memory_usage, datast
assert b'155.55' in res.data
delete_all_watches(client)
def test_itemprop_as_str(client, live_server, measure_memory_usage, datastore_path):
test_return_data = f"""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<span itemprop="offers" itemscope itemtype="http://schema.org/Offer">
<meta content="767.55" itemprop="price"/>
<meta content="EUR" itemprop="priceCurrency"/>
<meta content="InStock" itemprop="availability"/>
<meta content="https://www.123-test.dk" itemprop="url"/>
</span>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
test_url = url_for('test_endpoint', _external=True)
client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
follow_redirects=True
)
client.get(url_for("ui.form_watch_checknow"))
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'767.55' in res.data
-266
View File
@@ -1,5 +1,4 @@
import os
import pytest
from flask import url_for
@@ -25,31 +24,6 @@ def set_original_response(datastore_path):
f.write(test_return_data)
return None
def test_favicon(client, live_server, measure_memory_usage, datastore_path):
# Attempt to fetch it, make sure that works
SVG_BASE64 = 'PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxIDEiLz4='
uuid = client.application.config.get('DATASTORE').add_watch(url='https://localhost')
live_server.app.config['DATASTORE'].data['watching'][uuid].bump_favicon(url="favicon-set-type.svg",
favicon_base_64=SVG_BASE64
)
res = client.get(url_for('static_content', group='favicon', filename=uuid))
assert res.status_code == 200
assert len(res.data) > 10
res = client.get(url_for('static_content', group='..', filename='__init__.py'))
assert res.status_code != 200
res = client.get(url_for('static_content', group='.', filename='../__init__.py'))
assert res.status_code != 200
# Traverse by filename protection
res = client.get(url_for('static_content', group='js', filename='../styles/styles.css'))
assert res.status_code != 200
def test_bad_access(client, live_server, measure_memory_usage, datastore_path):
res = client.post(
@@ -504,243 +478,3 @@ def test_logout_with_redirect(client, live_server, measure_memory_usage, datasto
# Cleanup
del client.application.config['DATASTORE'].data['settings']['application']['password']
def test_static_directory_traversal(client, live_server, measure_memory_usage, datastore_path):
"""
Test that the static file serving route properly blocks directory traversal attempts.
This tests the fix for GHSA-9jj8-v89v-xjvw (CVE pending).
The vulnerability was in /static/<group>/<filename> where the sanitization regex
allowed dots, enabling "../" traversal to read application source files.
The fix changed the regex from r'[^\w.-]+' to r'[^a-z0-9_]+' which blocks dots.
"""
# Test 1: Direct .. traversal attempt (URL-encoded)
res = client.get(
"/static/%2e%2e/flask_app.py",
follow_redirects=False
)
# Should be blocked (404 or 403)
assert res.status_code in [404, 403], f"Expected 404/403, got {res.status_code}"
# Should NOT contain application source code
assert b"def static_content" not in res.data
assert b"changedetection_app" not in res.data
# Test 2: Direct .. traversal attempt (unencoded)
res = client.get(
"/static/../flask_app.py",
follow_redirects=False
)
assert res.status_code in [404, 403], f"Expected 404/403, got {res.status_code}"
assert b"def static_content" not in res.data
# Test 3: Multiple dots traversal
res = client.get(
"/static/..../flask_app.py",
follow_redirects=False
)
assert res.status_code in [404, 403], f"Expected 404/403, got {res.status_code}"
assert b"def static_content" not in res.data
# Test 4: Try to access other application files
for filename in ["__init__.py", "datastore.py", "store.py"]:
res = client.get(
f"/static/%2e%2e/{filename}",
follow_redirects=False
)
assert res.status_code in [404, 403], f"File {filename} should be blocked"
# Should not contain Python code indicators
assert b"import" not in res.data or b"# Test" in res.data # Allow "1 Imported" etc
# Test 5: Verify legitimate static files still work
# Note: We can't test actual files without knowing what exists,
# but we can verify the sanitization doesn't break valid groups
res = client.get(
"/static/images/test.png", # Will 404 if file doesn't exist, but won't traverse
follow_redirects=False
)
# Should get 404 (file not found) not 403 (blocked)
# This confirms the group name "images" is valid
assert res.status_code == 404
# Test 6: Ensure hyphens and dots are blocked in group names
res = client.get(
"/static/../../../etc/passwd",
follow_redirects=False
)
assert res.status_code in [404, 403]
assert b"root:" not in res.data
# Test 7: Test that underscores still work (they're allowed)
res = client.get(
"/static/visual_selector_data/test.json",
follow_redirects=False
)
# visual_selector_data is a real group, but requires auth
# Should get 403 (not authenticated) or 404 (file not found), not a path traversal
assert res.status_code in [403, 404]
def test_ssrf_private_ip_blocked(client, live_server, monkeypatch, measure_memory_usage, datastore_path):
"""
SSRF protection: IANA-reserved/private IP addresses are blocked at fetch-time, not add-time.
Watches targeting private/reserved IPs can be *added* freely; the block happens when the
fetcher actually tries to reach the URL (via validate_iana_url() in call_browser()).
Covers:
1. is_private_hostname() correctly classifies all reserved ranges
2. is_safe_valid_url() ALLOWS private-IP URLs at add-time (IANA check moved to fetch-time)
3. ALLOW_IANA_RESTRICTED_ADDRESSES has no effect on add-time; it only controls fetch-time
4. UI form accepts private-IP URLs at add-time without error
5. Requests fetcher blocks fetch-time DNS rebinding (fresh check on every fetch)
6. Requests fetcher blocks redirects that lead to a private IP (open-redirect bypass)
conftest.py sets ALLOW_IANA_RESTRICTED_ADDRESSES=true globally so the test
server (localhost) keeps working for all other tests. monkeypatch temporarily
overrides it to 'false' here, and is automatically restored after the test.
"""
from unittest.mock import patch, MagicMock
from changedetectionio.validate_url import is_safe_valid_url, is_private_hostname
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
# ------------------------------------------------------------------
# 1. is_private_hostname() — unit tests across all reserved ranges
# ------------------------------------------------------------------
private_hosts = [
'127.0.0.1', # loopback
'10.0.0.1', # RFC 1918
'172.16.0.1', # RFC 1918
'192.168.1.1', # RFC 1918
'169.254.169.254', # link-local / AWS metadata endpoint
'::1', # IPv6 loopback
'fc00::1', # IPv6 unique local
'fe80::1', # IPv6 link-local
]
for host in private_hosts:
assert is_private_hostname(host), f"{host} should be identified as private/reserved"
for host in ['8.8.8.8', '1.1.1.1']:
assert not is_private_hostname(host), f"{host} should be identified as public"
# ------------------------------------------------------------------
# 2. is_safe_valid_url() ALLOWS private-IP URLs at add-time
# IANA check is no longer done here — it moved to fetch-time validate_iana_url()
# ------------------------------------------------------------------
private_ip_urls = [
'http://127.0.0.1/',
'http://10.0.0.1/',
'http://172.16.0.1/',
'http://192.168.1.1/',
'http://169.254.169.254/',
'http://169.254.169.254/latest/meta-data/iam/security-credentials/',
'http://[::1]/',
'http://[fc00::1]/',
'http://[fe80::1]/',
]
for url in private_ip_urls:
assert is_safe_valid_url(url), f"{url} should be allowed by is_safe_valid_url (IANA check is at fetch-time)"
# ------------------------------------------------------------------
# 3. ALLOW_IANA_RESTRICTED_ADDRESSES does not affect add-time validation
# It only controls fetch-time blocking inside validate_iana_url()
# ------------------------------------------------------------------
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'true')
assert is_safe_valid_url('http://127.0.0.1/'), \
"Private IP should be allowed at add-time regardless of ALLOW_IANA_RESTRICTED_ADDRESSES"
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
assert is_safe_valid_url('http://127.0.0.1/'), \
"Private IP should be allowed at add-time regardless of ALLOW_IANA_RESTRICTED_ADDRESSES"
# ------------------------------------------------------------------
# 4. UI form accepts private-IP URLs at add-time
# The watch is created; the SSRF block fires later at fetch-time
# ------------------------------------------------------------------
for url in ['http://127.0.0.1/', 'http://169.254.169.254/latest/meta-data/']:
res = client.post(
url_for('ui.ui_views.form_quick_watch_add'),
data={'url': url, 'tags': ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted or invalid URL format' not in res.data, \
f"UI should accept {url} at add-time (SSRF is blocked at fetch-time)"
# ------------------------------------------------------------------
# 5. Fetch-time DNS-rebinding check in the requests fetcher
# Simulates: URL passed add-time validation with a public IP, but
# by fetch time DNS has been rebound to a private IP.
# ------------------------------------------------------------------
from changedetectionio.content_fetchers.requests import fetcher as RequestsFetcher
f = RequestsFetcher()
with patch('changedetectionio.content_fetchers.requests.is_private_hostname', return_value=True):
with pytest.raises(Exception, match='private/reserved'):
f._run_sync(
url='http://example.com/',
timeout=5,
request_headers={},
request_body=None,
request_method='GET',
)
# ------------------------------------------------------------------
# 6. Redirect-to-private-IP blocked (open-redirect SSRF bypass)
# Public host returns a 302 pointing at an IANA-reserved address.
# ------------------------------------------------------------------
mock_redirect = MagicMock()
mock_redirect.is_redirect = True
mock_redirect.status_code = 302
mock_redirect.headers = {'Location': 'http://169.254.169.254/latest/meta-data/'}
def _private_only_for_redirect(hostname):
# Initial host is "public"; the redirect target is private
return hostname in {'169.254.169.254', '10.0.0.1', '172.16.0.1',
'192.168.0.1', '127.0.0.1', '::1'}
with patch('changedetectionio.content_fetchers.requests.is_private_hostname',
side_effect=_private_only_for_redirect):
with patch('requests.Session.request', return_value=mock_redirect):
with pytest.raises(Exception, match='Redirect blocked'):
f._run_sync(
url='http://example.com/',
timeout=5,
request_headers={},
request_body=None,
request_method='GET',
)
def test_unresolvable_hostname_is_allowed(client, live_server, monkeypatch):
"""
Unresolvable hostnames must NOT be blocked at add-time when ALLOW_IANA_RESTRICTED_ADDRESSES=false.
DNS failure (gaierror) at add-time does not mean the URL resolves to a private IP
the domain may simply be offline or not yet live. Blocking it would be a false positive.
The real DNS-rebinding protection happens at fetch-time in call_browser().
"""
from changedetectionio.validate_url import is_safe_valid_url
monkeypatch.setenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')
url = 'http://this-host-does-not-exist-xyz987.invalid/some/path'
# Should pass URL validation despite being unresolvable
assert is_safe_valid_url(url), \
"Unresolvable hostname should pass is_safe_valid_url — DNS failure is not a private-IP signal"
# Should be accepted via the UI form and appear in the watch list
res = client.post(
url_for('ui.ui_views.form_quick_watch_add'),
data={'url': url, 'tags': ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted or invalid URL format' not in res.data, \
"UI should not reject a URL just because its hostname is unresolvable"
res = client.get(url_for('watchlist.index'))
assert b'this-host-does-not-exist-xyz987.invalid' in res.data, \
"Unresolvable hostname watch should appear in the watch overview list"

Some files were not shown because too many files have changed in this diff Show More