mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-04-29 22:37:09 +00:00
Compare commits
91 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a89c30f882 | |||
| c6744f6969 | |||
| 01eb8f629a | |||
| faa7fa88cd | |||
| 3123bf0016 | |||
| fcadda5f09 | |||
| dc157cccd5 | |||
| 8018742c67 | |||
| e41b33269f | |||
| d4d6bb2872 | |||
| 45fb262386 | |||
| 1058debc12 | |||
| 61b41b0b16 | |||
| efe3afd383 | |||
| 84d26640cc | |||
| 2349344d9e | |||
| bdc2916c07 | |||
| 4fd477a60c | |||
| dc8b387f40 | |||
| 2149a6fe3b | |||
| f77d2bac6d | |||
| 75ecd1b793 | |||
| 4fe2a67839 | |||
| 5bbbe37436 | |||
| 83d7ce0fcf | |||
| 6bea9909ec | |||
| 1aabf967ef | |||
| 30dc4ac23b | |||
| 2658f81f02 | |||
| 674d863a21 | |||
| 0b9cfcdf09 | |||
| fd820c9330 | |||
| e02a1824c5 | |||
| 5911b7fe7a | |||
| a239480272 | |||
| fceb3cf39f | |||
| 7f631268dd | |||
| 8cc04ca7c5 | |||
| 4dec1e017b | |||
| 9d1743adbe | |||
| f34d806b09 | |||
| c22335ed01 | |||
| 0042f0c36a | |||
| 55e14cf394 | |||
| 308ccb5841 | |||
| 978e17acf6 | |||
| 73c29d1fa0 | |||
| b3eb88b6d2 | |||
| aa73ce2ee6 | |||
| 0cbf345e84 | |||
| d65e08e7c8 | |||
| 10f233a939 | |||
| 52911d699f | |||
| 7e886e0c56 | |||
| 151e603af7 | |||
| 7311af4b58 | |||
| af193e8d7a | |||
| 9e2acadb7e | |||
| 48da93b4ec | |||
| 0c1adc8906 | |||
| 9e5a0a0209 | |||
| 9b96689072 | |||
| 5e5674f48d | |||
| 272e68ad2e | |||
| 01e06979d8 | |||
| e45c77d51d | |||
| bee1130c6e | |||
| 5f8448d0e2 | |||
| 9438d38dc6 | |||
| d0c66758c2 | |||
| 9e8a9d5907 | |||
| 7449be39fb | |||
| e9f3d0bce4 | |||
| 2abc8aa9b4 | |||
| 69b70a2a07 | |||
| 0c42bcb8d6 | |||
| 091c708a28 | |||
| 084be9c990 | |||
| 6db1085337 | |||
| 66553e106d | |||
| 5b01dbd9f8 | |||
| c86f214fc3 | |||
| 32149640d9 | |||
| 15f16455fc | |||
| 15cdfac9d9 | |||
| 04de397916 | |||
| 4643082c5b | |||
| 3b2b74e62d | |||
| 68354cf53d | |||
| 3e364e0eba | |||
| 06ea29bfc7 |
@@ -15,7 +15,6 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- dev
|
||||
|
||||
jobs:
|
||||
metadata:
|
||||
@@ -93,17 +92,28 @@ jobs:
|
||||
version: latest
|
||||
driver-opts: image=moby/buildkit:master
|
||||
|
||||
# dev branch -> :dev container tag
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
||||
ghcr.io/${{ github.repository }}
|
||||
tags: |
|
||||
type=raw,value=dev
|
||||
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref == 'refs/heads/dev' }}
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
|
||||
tags: ${{ steps.meta_dev.outputs.tags }}
|
||||
labels: ${{ steps.meta_dev.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
@@ -142,6 +152,7 @@ jobs:
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
@@ -61,8 +61,8 @@ jobs:
|
||||
|
||||
# --- API test ---
|
||||
# This also means that the docs/api-spec.yml was shipped and could be read
|
||||
test -f /tmp/url-watches.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json)
|
||||
test -f /tmp/changedetection.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/changedetection.json)
|
||||
echo Test API KEY is $API_KEY
|
||||
curl -X POST "http://127.0.0.1:10000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
|
||||
@@ -37,10 +37,29 @@ jobs:
|
||||
${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Get current date for cache key
|
||||
id: date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
build-args: |
|
||||
PYTHON_VERSION=${{ env.PYTHON_VERSION }}
|
||||
LOGGER_LEVEL=TRACE
|
||||
tags: test-changedetectionio
|
||||
load: true
|
||||
cache-from: type=gha,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }}
|
||||
cache-to: type=gha,mode=max,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }}
|
||||
|
||||
- name: Verify build
|
||||
run: |
|
||||
echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
|
||||
echo "---- Built for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
|
||||
- name: We should be Python ${{ env.PYTHON_VERSION }} ...
|
||||
@@ -84,6 +103,7 @@ jobs:
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
@@ -110,6 +130,32 @@ jobs:
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
|
||||
- name: Test CLI options
|
||||
run: |
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-cli-opts --network changedet-network test-changedetectionio bash -c 'changedetectionio/test_cli_opts.sh' &> cli-opts-output.txt
|
||||
echo "=== CLI Options Test Output ==="
|
||||
cat cli-opts-output.txt
|
||||
|
||||
- name: CLI Memory Test
|
||||
run: |
|
||||
echo "=== Checking CLI batch mode memory usage ==="
|
||||
# Extract RSS memory value from output
|
||||
RSS_MB=$(grep -oP "Memory consumption before worker shutdown: RSS=\K[\d.]+" cli-opts-output.txt | head -1 || echo "0")
|
||||
echo "RSS Memory: ${RSS_MB} MB"
|
||||
|
||||
# Check if RSS is less than 100MB
|
||||
if [ -n "$RSS_MB" ]; then
|
||||
if (( $(echo "$RSS_MB < 100" | bc -l) )); then
|
||||
echo "✓ Memory usage is acceptable: ${RSS_MB} MB < 100 MB"
|
||||
else
|
||||
echo "✗ Memory usage too high: ${RSS_MB} MB >= 100 MB"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "⚠ Could not extract memory usage, skipping check"
|
||||
fi
|
||||
|
||||
- name: Extract memory report and logs
|
||||
if: always()
|
||||
uses: ./.github/actions/extract-memory-report
|
||||
@@ -124,6 +170,13 @@ jobs:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Store CLI test output
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||
path: cli-opts-output.txt
|
||||
|
||||
# Playwright tests
|
||||
playwright-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -342,6 +395,29 @@ jobs:
|
||||
cd changedetectionio
|
||||
./run_custom_browser_url_tests.sh
|
||||
|
||||
processor-plugin-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Basic processor plugin registration and checks
|
||||
run: |
|
||||
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
|
||||
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -29,3 +29,4 @@ test-datastore/
|
||||
|
||||
# Memory consumption log
|
||||
test-memory.log
|
||||
tests/logs/
|
||||
|
||||
+15
@@ -78,6 +78,12 @@ RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
# Final image stage
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm
|
||||
LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"
|
||||
LABEL org.opencontainers.image.url="https://changedetection.io"
|
||||
LABEL org.opencontainers.image.documentation="https://changedetection.io/tutorials"
|
||||
LABEL org.opencontainers.image.title="changedetection.io"
|
||||
LABEL org.opencontainers.image.description="Self-hosted web page change monitoring and notification service"
|
||||
LABEL org.opencontainers.image.licenses="Apache-2.0"
|
||||
LABEL org.opencontainers.image.vendor="changedetection.io"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libxslt1.1 \
|
||||
@@ -132,6 +138,15 @@ ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy and set up entrypoint script for installing extra packages
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh
|
||||
|
||||
# Set entrypoint to handle EXTRA_PACKAGES env var
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
|
||||
# Default command (can be overridden in docker-compose.yml)
|
||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ recursive-include changedetectionio/notification *
|
||||
recursive-include changedetectionio/processors *
|
||||
recursive-include changedetectionio/realtime *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/store *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
recursive-include changedetectionio/translations *
|
||||
@@ -16,6 +17,7 @@ recursive-include changedetectionio/widgets *
|
||||
prune changedetectionio/static/package-lock.json
|
||||
prune changedetectionio/static/styles/node_modules
|
||||
prune changedetectionio/static/styles/package-lock.json
|
||||
include changedetectionio/favicon_utils.py
|
||||
include changedetection.py
|
||||
include requirements.txt
|
||||
include README-pip.md
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
[python: **.py]
|
||||
keywords = _:1,_l:1,gettext:1
|
||||
|
||||
[jinja2: **/templates/**.html]
|
||||
encoding = utf-8
|
||||
+401
-51
@@ -2,23 +2,24 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.52.4'
|
||||
__version__ = '0.52.9'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
from loguru import logger
|
||||
import getopt
|
||||
import logging
|
||||
import os
|
||||
import getopt
|
||||
import platform
|
||||
import signal
|
||||
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
# Eventlet completely removed - using threading mode for SocketIO
|
||||
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
from loguru import logger
|
||||
# Note: store and changedetection_app are imported inside main() to avoid
|
||||
# initialization before argument parsing (allows --help to work without loading everything)
|
||||
|
||||
# ==============================================================================
|
||||
# Multiprocessing Configuration - CRITICAL for Thread Safety
|
||||
@@ -41,9 +42,10 @@ from loguru import logger
|
||||
#
|
||||
# IMPLEMENTATION:
|
||||
# 1. Explicit contexts everywhere (primary protection):
|
||||
# - Watch.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - playwright.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - puppeteer.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - isolated_opencv.py: ctx = multiprocessing.get_context('spawn')
|
||||
# - isolated_libvips.py: ctx = multiprocessing.get_context('spawn')
|
||||
#
|
||||
# 2. Global default (defense-in-depth, below):
|
||||
# - Safety net if future code forgets explicit context
|
||||
@@ -82,15 +84,26 @@ def get_version():
|
||||
def sigshutdown_handler(_signo, _stack_frame):
|
||||
name = signal.Signals(_signo).name
|
||||
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
|
||||
|
||||
|
||||
# Set exit flag immediately to stop all loops
|
||||
app.config.exit.set()
|
||||
datastore.stop_thread = True
|
||||
|
||||
|
||||
# Log memory consumption before shutting down workers (cross-platform)
|
||||
try:
|
||||
import psutil
|
||||
process = psutil.Process()
|
||||
mem_info = process.memory_info()
|
||||
rss_mb = mem_info.rss / 1024 / 1024
|
||||
vms_mb = mem_info.vms / 1024 / 1024
|
||||
logger.info(f"Memory consumption before worker shutdown: RSS={rss_mb:,.2f} MB, VMS={vms_mb:,.2f} MB")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not retrieve memory stats: {str(e)}")
|
||||
|
||||
# Shutdown workers and queues immediately
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
from changedetectionio import worker_pool
|
||||
worker_pool.shutdown_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down workers: {str(e)}")
|
||||
|
||||
@@ -99,9 +112,9 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
update_q.close()
|
||||
notification_q.close()
|
||||
logger.debug("Janus queues closed successfully")
|
||||
logger.debug("Queues closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
|
||||
logger.critical(f"CRITICAL: Failed to close queues: {e}")
|
||||
|
||||
# Shutdown socketio server fast
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
@@ -111,31 +124,81 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down Socket.IO server: {str(e)}")
|
||||
|
||||
# Save data quickly
|
||||
try:
|
||||
datastore.sync_to_json()
|
||||
logger.success('Fast sync to disk complete.')
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing to disk: {str(e)}")
|
||||
|
||||
# With immediate persistence, all data is already saved
|
||||
logger.success('All data already persisted (immediate commits enabled).')
|
||||
|
||||
sys.exit()
|
||||
|
||||
def print_help():
|
||||
"""Print help text for command line options"""
|
||||
print('Usage: changedetection.py [options]')
|
||||
print('')
|
||||
print('Standard options:')
|
||||
print(' -s SSL enable')
|
||||
print(' -h HOST Listen host (default: 0.0.0.0)')
|
||||
print(' -p PORT Listen port (default: 5000)')
|
||||
print(' -d PATH Datastore path')
|
||||
print(' -l LEVEL Log level (TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL)')
|
||||
print(' -c Cleanup unused snapshots')
|
||||
print(' -C Create datastore directory if it doesn\'t exist')
|
||||
print(' -P true/false Set all watches paused (true) or active (false)')
|
||||
print('')
|
||||
print('Add URLs on startup:')
|
||||
print(' -u URL Add URL to watch (can be used multiple times)')
|
||||
print(' -u0 \'JSON\' Set options for first -u URL (e.g. \'{"processor":"text_json_diff"}\')')
|
||||
print(' -u1 \'JSON\' Set options for second -u URL (0-indexed)')
|
||||
print(' -u2 \'JSON\' Set options for third -u URL, etc.')
|
||||
print(' Available options: processor, fetch_backend, headers, method, etc.')
|
||||
print(' See model/Watch.py for all available options')
|
||||
print('')
|
||||
print('Recheck on startup:')
|
||||
print(' -r all Queue all watches for recheck on startup')
|
||||
print(' -r UUID,... Queue specific watches (comma-separated UUIDs)')
|
||||
print(' -r all N Queue all watches, wait for completion, repeat N times')
|
||||
print(' -r UUID,... N Queue specific watches, wait for completion, repeat N times')
|
||||
print('')
|
||||
print('Batch mode:')
|
||||
print(' -b Run in batch mode (process queue then exit)')
|
||||
print(' Useful for CI/CD, cron jobs, or one-time checks')
|
||||
print(' NOTE: Batch mode checks if Flask is running and aborts if port is in use')
|
||||
print(' Use -p PORT to specify a different port if needed')
|
||||
print('')
|
||||
|
||||
def main():
|
||||
global datastore
|
||||
global app
|
||||
|
||||
# Early help/version check before any initialization
|
||||
if '--help' in sys.argv or '-help' in sys.argv:
|
||||
print_help()
|
||||
sys.exit(0)
|
||||
|
||||
if '--version' in sys.argv or '-v' in sys.argv:
|
||||
print(f'changedetection.io {__version__}')
|
||||
sys.exit(0)
|
||||
|
||||
# Import heavy modules after help/version checks to keep startup fast for those flags
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
# Optional URL to watch since start
|
||||
default_url = None
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
include_default_watches = True
|
||||
all_paused = None # None means don't change, True/False to set
|
||||
|
||||
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
|
||||
port = int(os.environ.get('PORT', 5000))
|
||||
ssl_mode = False
|
||||
|
||||
# Lists for multiple URLs and their options
|
||||
urls_to_add = []
|
||||
url_options = {} # Key: index (0-based), Value: dict of options
|
||||
recheck_watches = None # None, 'all', or list of UUIDs
|
||||
recheck_repeat_count = 1 # Number of times to repeat recheck cycle
|
||||
batch_mode = False # Run once then exit when queue is empty
|
||||
|
||||
# On Windows, create and use a default path.
|
||||
if os.name == 'nt':
|
||||
datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
|
||||
@@ -144,10 +207,68 @@ def main():
|
||||
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
|
||||
datastore_path = os.path.join(os.getcwd(), "../datastore")
|
||||
|
||||
# Pre-process arguments to extract -u, -u<N>, and -r options before getopt
|
||||
# This allows unlimited -u0, -u1, -u2, ... options without predefining them
|
||||
cleaned_argv = ['changedetection.py'] # Start with program name
|
||||
i = 1
|
||||
while i < len(sys.argv):
|
||||
arg = sys.argv[i]
|
||||
|
||||
# Handle -u (add URL)
|
||||
if arg == '-u' and i + 1 < len(sys.argv):
|
||||
urls_to_add.append(sys.argv[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -u<N> (set options for URL at index N)
|
||||
if arg.startswith('-u') and len(arg) > 2 and arg[2:].isdigit():
|
||||
idx = int(arg[2:])
|
||||
if i + 1 < len(sys.argv):
|
||||
try:
|
||||
import json
|
||||
url_options[idx] = json.loads(sys.argv[i + 1])
|
||||
except json.JSONDecodeError as e:
|
||||
print(f'Error: Invalid JSON for {arg}: {sys.argv[i + 1]}')
|
||||
print(f'JSON decode error: {e}')
|
||||
sys.exit(2)
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -r (recheck watches)
|
||||
if arg == '-r' and i + 1 < len(sys.argv):
|
||||
recheck_arg = sys.argv[i + 1]
|
||||
if recheck_arg.lower() == 'all':
|
||||
recheck_watches = 'all'
|
||||
else:
|
||||
# Parse comma-separated list of UUIDs
|
||||
recheck_watches = [uuid.strip() for uuid in recheck_arg.split(',') if uuid.strip()]
|
||||
|
||||
# Check for optional repeat count as third argument
|
||||
if i + 2 < len(sys.argv) and sys.argv[i + 2].isdigit():
|
||||
recheck_repeat_count = int(sys.argv[i + 2])
|
||||
if recheck_repeat_count < 1:
|
||||
print(f'Error: Repeat count must be at least 1, got {recheck_repeat_count}')
|
||||
sys.exit(2)
|
||||
i += 3
|
||||
else:
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -b (batch mode - run once and exit)
|
||||
if arg == '-b':
|
||||
batch_mode = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Keep other arguments for getopt
|
||||
cleaned_argv.append(arg)
|
||||
i += 1
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
|
||||
opts, args = getopt.getopt(cleaned_argv[1:], "6Ccsd:h:p:l:P:", "port")
|
||||
except getopt.GetoptError as e:
|
||||
print_help()
|
||||
print(f'Error: {e}')
|
||||
sys.exit(2)
|
||||
|
||||
create_datastore_dir = False
|
||||
@@ -172,10 +293,6 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
if opt == '-u':
|
||||
default_url = arg
|
||||
include_default_watches = False
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
@@ -187,6 +304,18 @@ def main():
|
||||
if opt == '-l':
|
||||
logger_level = int(arg) if arg.isdigit() else arg.upper()
|
||||
|
||||
if opt == '-P':
|
||||
try:
|
||||
all_paused = bool(strtobool(arg))
|
||||
except ValueError:
|
||||
print(f'Error: Invalid value for -P option: {arg}')
|
||||
print('Expected: true, false, yes, no, 1, or 0')
|
||||
sys.exit(2)
|
||||
|
||||
# If URLs are provided, don't include default watches
|
||||
if urls_to_add:
|
||||
include_default_watches = False
|
||||
|
||||
|
||||
logger.success(f"changedetection.io version {get_version()} starting.")
|
||||
# Launch using SocketIO run method for proper integration (if enabled)
|
||||
@@ -223,11 +352,16 @@ def main():
|
||||
logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING)
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
app_config = {
|
||||
'datastore_path': datastore_path,
|
||||
'batch_mode': batch_mode,
|
||||
'recheck_watches': recheck_watches,
|
||||
'recheck_repeat_count': recheck_repeat_count
|
||||
}
|
||||
|
||||
if not os.path.isdir(app_config['datastore_path']):
|
||||
if create_datastore_dir:
|
||||
os.mkdir(app_config['datastore_path'])
|
||||
os.makedirs(app_config['datastore_path'], exist_ok=True)
|
||||
else:
|
||||
logger.critical(
|
||||
f"ERROR: Directory path for the datastore '{app_config['datastore_path']}'"
|
||||
@@ -244,15 +378,209 @@ def main():
|
||||
logger.critical(str(e))
|
||||
return
|
||||
|
||||
# Apply all_paused setting if specified via CLI
|
||||
if all_paused is not None:
|
||||
datastore.data['settings']['application']['all_paused'] = all_paused
|
||||
logger.info(f"Setting all watches paused: {all_paused}")
|
||||
|
||||
# Inject datastore into plugins that need access to settings
|
||||
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
|
||||
inject_datastore_into_plugins(datastore)
|
||||
|
||||
if default_url:
|
||||
datastore.add_watch(url = default_url)
|
||||
# Step 1: Add URLs with their options (if provided via -u flags)
|
||||
added_watch_uuids = []
|
||||
if urls_to_add:
|
||||
logger.info(f"Adding {len(urls_to_add)} URL(s) from command line")
|
||||
for idx, url in enumerate(urls_to_add):
|
||||
extras = url_options.get(idx, {})
|
||||
if extras:
|
||||
logger.debug(f"Adding watch {idx}: {url} with options: {extras}")
|
||||
else:
|
||||
logger.debug(f"Adding watch {idx}: {url}")
|
||||
|
||||
new_uuid = datastore.add_watch(url=url, extras=extras)
|
||||
if new_uuid:
|
||||
added_watch_uuids.append(new_uuid)
|
||||
logger.success(f"Added watch: {url} (UUID: {new_uuid})")
|
||||
else:
|
||||
logger.error(f"Failed to add watch: {url}")
|
||||
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Step 2: Queue newly added watches (if -u was provided in batch mode)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if batch_mode and added_watch_uuids:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
logger.info(f"Batch mode: Queuing {len(added_watch_uuids)} newly added watches")
|
||||
for watch_uuid in added_watch_uuids:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
logger.debug(f"Queued newly added watch: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
|
||||
# Step 3: Queue watches for recheck (if -r was provided)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if recheck_watches is not None:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
# Queue all watches, excluding those already queued in batch mode
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids:
|
||||
# Exclude newly added watches that were already queued in batch mode
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
logger.info(f"Queuing {len(watches_to_queue)} existing watches for recheck ({len(added_watch_uuids)} newly added watches already queued)")
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Queuing all {len(watches_to_queue)} watches for recheck")
|
||||
else:
|
||||
# Queue specific UUIDs
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Queuing {len(watches_to_queue)} specific watches for recheck")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
logger.debug(f"Queued watch for recheck: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
|
||||
logger.success(f"Successfully queued {queued_count} watches for recheck")
|
||||
|
||||
# Step 4: Setup batch mode monitor (if -b was provided)
|
||||
if batch_mode:
|
||||
from changedetectionio.flask_app import update_q
|
||||
|
||||
# Safety check: Ensure Flask app is not already running on this port
|
||||
# Batch mode should never run alongside the web server
|
||||
import socket
|
||||
test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
|
||||
try:
|
||||
# Try to bind to the configured host:port (no SO_REUSEADDR - strict check)
|
||||
test_socket.bind((host, port))
|
||||
test_socket.close()
|
||||
logger.debug(f"Batch mode: Port {port} is available (Flask app not running)")
|
||||
except OSError as e:
|
||||
test_socket.close()
|
||||
# errno 98 = EADDRINUSE (Linux)
|
||||
# errno 48 = EADDRINUSE (macOS)
|
||||
# errno 10048 = WSAEADDRINUSE (Windows)
|
||||
if e.errno in (48, 98, 10048) or "Address already in use" in str(e) or "already in use" in str(e).lower():
|
||||
logger.critical(f"ERROR: Batch mode cannot run - port {port} is already in use")
|
||||
logger.critical(f"The Flask web server appears to be running on {host}:{port}")
|
||||
logger.critical(f"Batch mode is designed for standalone operation (CI/CD, cron jobs, etc.)")
|
||||
logger.critical(f"Please either stop the Flask web server, or use a different port with -p PORT")
|
||||
sys.exit(1)
|
||||
else:
|
||||
# Some other socket error - log but continue (might be network configuration issue)
|
||||
logger.warning(f"Port availability check failed with unexpected error: {e}")
|
||||
logger.warning(f"Continuing with batch mode anyway - be aware of potential conflicts")
|
||||
|
||||
def queue_watches_for_recheck(datastore, iteration):
|
||||
"""Helper function to queue watches for recheck"""
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids and iteration == 1:
|
||||
# Only exclude newly added watches on first iteration
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing all {len(watches_to_queue)} watches")
|
||||
elif recheck_watches:
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing {len(watches_to_queue)} specific watches")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
logger.success(f"Batch mode (iteration {iteration}): Successfully queued {queued_count} watches")
|
||||
return queued_count
|
||||
|
||||
def batch_mode_monitor():
|
||||
"""Monitor queue and workers, shutdown or repeat when work is complete"""
|
||||
import time
|
||||
|
||||
# Track iterations if repeat mode is enabled
|
||||
current_iteration = 1
|
||||
total_iterations = recheck_repeat_count if recheck_watches and recheck_repeat_count > 1 else 1
|
||||
|
||||
if total_iterations > 1:
|
||||
logger.info(f"Batch mode: Will repeat recheck {total_iterations} times")
|
||||
else:
|
||||
logger.info("Batch mode: Waiting for all queued items to complete...")
|
||||
|
||||
# Wait a bit for workers to start processing
|
||||
time.sleep(3)
|
||||
|
||||
try:
|
||||
while current_iteration <= total_iterations:
|
||||
logger.info(f"Batch mode: Waiting for iteration {current_iteration}/{total_iterations} to complete...")
|
||||
|
||||
# Use the shared wait_for_all_checks function
|
||||
completed = worker_pool.wait_for_all_checks(update_q, timeout=300)
|
||||
|
||||
if not completed:
|
||||
logger.warning(f"Batch mode: Iteration {current_iteration} timed out after 300 seconds")
|
||||
|
||||
logger.success(f"Batch mode: Iteration {current_iteration}/{total_iterations} completed")
|
||||
|
||||
# Check if we need to repeat
|
||||
if current_iteration < total_iterations:
|
||||
logger.info(f"Batch mode: Starting iteration {current_iteration + 1}...")
|
||||
current_iteration += 1
|
||||
|
||||
# Re-queue watches for next iteration
|
||||
queue_watches_for_recheck(datastore, current_iteration)
|
||||
|
||||
# Brief pause before continuing
|
||||
time.sleep(2)
|
||||
else:
|
||||
# All iterations complete
|
||||
logger.success(f"Batch mode: All {total_iterations} iterations completed, initiating shutdown")
|
||||
# Trigger shutdown
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch mode monitor error: {e}")
|
||||
logger.error(f"Initiating emergency shutdown")
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
|
||||
# Start monitor in background thread
|
||||
monitor_thread = threading.Thread(target=batch_mode_monitor, daemon=True, name="BatchModeMonitor")
|
||||
monitor_thread.start()
|
||||
logger.info("Batch mode enabled: Will exit after all queued items are processed")
|
||||
|
||||
# Get the SocketIO instance from the Flask app (created in flask_app.py)
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
global socketio
|
||||
@@ -286,7 +614,9 @@ def main():
|
||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False,
|
||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True),
|
||||
all_paused=datastore.data['settings']['application'].get('all_paused', False),
|
||||
all_muted=datastore.data['settings']['application'].get('all_muted', False)
|
||||
)
|
||||
|
||||
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
|
||||
@@ -308,23 +638,43 @@ def main():
|
||||
if os.getenv('USE_X_SETTINGS'):
|
||||
logger.info("USE_X_SETTINGS is ENABLED")
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
||||
app.wsgi_app = ProxyFix(
|
||||
app.wsgi_app,
|
||||
x_for=1, # X-Forwarded-For (client IP)
|
||||
x_proto=1, # X-Forwarded-Proto (http/https)
|
||||
x_host=1, # X-Forwarded-Host (original host)
|
||||
x_port=1, # X-Forwarded-Port (original port)
|
||||
x_prefix=1 # X-Forwarded-Prefix (URL prefix)
|
||||
)
|
||||
|
||||
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
# In batch mode, skip starting the HTTP server - just keep workers running
|
||||
if batch_mode:
|
||||
logger.info("Batch mode: Skipping HTTP server startup, workers will process queue")
|
||||
logger.info("Batch mode: Main thread will wait for shutdown signal")
|
||||
# Keep main thread alive until batch monitor triggers shutdown
|
||||
try:
|
||||
while True:
|
||||
time.sleep(1)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Batch mode: Keyboard interrupt received")
|
||||
pass
|
||||
else:
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
# Normal mode: Start HTTP server
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
|
||||
@@ -67,7 +67,7 @@ class Notifications(Resource):
|
||||
|
||||
clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)]
|
||||
self.datastore.data['settings']['application']['notification_urls'] = clean_urls
|
||||
self.datastore.needs_write = True
|
||||
self.datastore.commit()
|
||||
|
||||
return {'notification_urls': clean_urls}, 200
|
||||
|
||||
@@ -95,7 +95,7 @@ class Notifications(Resource):
|
||||
abort(400, message="No matching notification URLs found.")
|
||||
|
||||
self.datastore.data['settings']['application']['notification_urls'] = notification_urls
|
||||
self.datastore.needs_write = True
|
||||
self.datastore.commit()
|
||||
|
||||
return 'OK', 204
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
|
||||
import threading
|
||||
from flask import request
|
||||
from . import auth
|
||||
|
||||
@@ -28,24 +30,44 @@ class Tag(Resource):
|
||||
abort(404, message=f'No tag exists with the UUID of {uuid}')
|
||||
|
||||
if request.args.get('recheck'):
|
||||
# Recheck all, including muted
|
||||
# Get most overdue first
|
||||
i=0
|
||||
# Recheck all watches with this tag, including muted
|
||||
# First collect watches to queue
|
||||
watches_to_queue = []
|
||||
for k in sorted(self.datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||
watch_uuid = k[0]
|
||||
watch = k[1]
|
||||
if not watch['paused'] and tag['uuid'] not in watch['tags']:
|
||||
continue
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
i+=1
|
||||
if not watch['paused'] and tag['uuid'] in watch['tags']:
|
||||
watches_to_queue.append(watch_uuid)
|
||||
|
||||
return f"OK, {i} watches queued", 200
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
return {'status': f'OK, queued {len(watches_to_queue)} watches for rechecking'}, 200
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
def queue_watches_background():
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
try:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
logger.info(f"Background queueing complete for tag {tag['uuid']}: {len(watches_to_queue)} watches queued")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing for tag {tag['uuid']}: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=queue_watches_background, daemon=True, name=f"QueueTag-{tag['uuid'][:8]}")
|
||||
thread.start()
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
|
||||
self.datastore.commit()
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False
|
||||
self.datastore.commit()
|
||||
return "OK", 200
|
||||
|
||||
return tag
|
||||
@@ -59,11 +81,13 @@ class Tag(Resource):
|
||||
|
||||
# Delete the tag, and any tag reference
|
||||
del self.datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
self.datastore.commit()
|
||||
|
||||
# Remove tag from all watches
|
||||
for watch_uuid, watch in self.datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
watch['tags'].remove(uuid)
|
||||
watch.commit()
|
||||
|
||||
return 'OK', 204
|
||||
|
||||
@@ -76,8 +100,18 @@ class Tag(Resource):
|
||||
if not tag:
|
||||
abort(404, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in request.json:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = request.json.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
tag.update(request.json)
|
||||
self.datastore.needs_write_urgent = True
|
||||
self.datastore.commit()
|
||||
|
||||
return "OK", 200
|
||||
|
||||
|
||||
+160
-91
@@ -1,10 +1,12 @@
|
||||
import os
|
||||
import threading
|
||||
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
from . import auth
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from flask import request, make_response, send_from_directory
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
@@ -64,43 +66,46 @@ class Watch(Resource):
|
||||
@validate_openapi_request('getWatch')
|
||||
def get(self, uuid):
|
||||
"""Get information about a single watch, recheck, pause, or mute."""
|
||||
import time
|
||||
from copy import deepcopy
|
||||
watch = None
|
||||
for _ in range(20):
|
||||
try:
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
break
|
||||
except RuntimeError:
|
||||
# Incase dict changed, try again
|
||||
time.sleep(0.01)
|
||||
|
||||
if not watch:
|
||||
# Get watch reference first (for pause/mute operations)
|
||||
watch_obj = self.datastore.data['watching'].get(uuid)
|
||||
if not watch_obj:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Create a dict copy for JSON response (with lock for thread safety)
|
||||
# This is much faster than deepcopy and doesn't copy the datastore reference
|
||||
# WARNING: dict() is a SHALLOW copy - nested dicts are shared with original!
|
||||
# Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts
|
||||
# If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj))
|
||||
with self.datastore.lock:
|
||||
watch = dict(watch_obj)
|
||||
|
||||
if request.args.get('recheck'):
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return "OK", 200
|
||||
if request.args.get('paused', '') == 'paused':
|
||||
self.datastore.data['watching'].get(uuid).pause()
|
||||
watch_obj.pause()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
elif request.args.get('paused', '') == 'unpaused':
|
||||
self.datastore.data['watching'].get(uuid).unpause()
|
||||
watch_obj.unpause()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
self.datastore.data['watching'].get(uuid).mute()
|
||||
watch_obj.mute()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
self.datastore.data['watching'].get(uuid).unmute()
|
||||
watch_obj.unmute()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
|
||||
# Return without history, get that via another API call
|
||||
# Properties are not returned as a JSON, so add the required props manually
|
||||
watch['history_n'] = watch.history_n
|
||||
watch['history_n'] = watch_obj.history_n
|
||||
# attr .last_changed will check for the last written text snapshot on change
|
||||
watch['last_changed'] = watch.last_changed
|
||||
watch['viewed'] = watch.viewed
|
||||
watch['link'] = watch.link,
|
||||
watch['last_changed'] = watch_obj.last_changed
|
||||
watch['viewed'] = watch_obj.viewed
|
||||
watch['link'] = watch_obj.link,
|
||||
|
||||
return watch
|
||||
|
||||
@@ -125,72 +130,57 @@ class Watch(Resource):
|
||||
|
||||
if request.json.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not request.json.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
if not plist or request.json.get('proxy') not in plist:
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(request.json)
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# XSS etc protection
|
||||
if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
|
||||
return "Invalid URL", 400
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in request.json:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = request.json.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# XSS etc protection - validate URL if it's being updated
|
||||
if 'url' in request.json:
|
||||
new_url = request.json.get('url')
|
||||
|
||||
# URL must be a non-empty string
|
||||
if new_url is None:
|
||||
return "URL cannot be null", 400
|
||||
|
||||
if not isinstance(new_url, str):
|
||||
return "URL must be a string", 400
|
||||
|
||||
if not new_url.strip():
|
||||
return "URL cannot be empty or whitespace only", 400
|
||||
|
||||
if not is_safe_valid_url(new_url.strip()):
|
||||
return "Invalid or unsupported URL format. URL must use http://, https://, or ftp:// protocol", 400
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
from changedetectionio import processors
|
||||
processor_config_data = {}
|
||||
regular_data = {}
|
||||
|
||||
for key, value in request.json.items():
|
||||
if key.startswith('processor_config_'):
|
||||
config_key = key.replace('processor_config_', '')
|
||||
if value: # Only save non-empty values
|
||||
processor_config_data[config_key] = value
|
||||
else:
|
||||
regular_data[key] = value
|
||||
# Make a mutable copy of request.json for modification
|
||||
json_data = dict(request.json)
|
||||
|
||||
# Extract and remove processor config fields from json_data
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(json_data)
|
||||
|
||||
# Update watch with regular (non-processor-config) fields
|
||||
watch.update(regular_data)
|
||||
watch.update(json_data)
|
||||
watch.commit()
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = request.json.get('processor', watch.get('processor'))
|
||||
if processor_name:
|
||||
# Create a processor instance to access config methods
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
processor_instance = difference_detection_processor(self.datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"API: Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"API: Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"API: Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API: Failed to save processor config: {e}")
|
||||
# Save processor config to JSON file
|
||||
processors.save_processor_config(self.datastore, uuid, processor_config_data)
|
||||
|
||||
return "OK", 200
|
||||
|
||||
@@ -231,6 +221,10 @@ class WatchSingleHistory(Resource):
|
||||
if timestamp == 'latest':
|
||||
timestamp = list(watch.history.keys())[-1]
|
||||
|
||||
# Validate that the timestamp exists in history
|
||||
if timestamp not in watch.history:
|
||||
abort(404, message=f"No history snapshot found for timestamp '{timestamp}'")
|
||||
|
||||
if request.args.get('html'):
|
||||
content = watch.get_fetched_html(timestamp)
|
||||
if content:
|
||||
@@ -379,16 +373,9 @@ class WatchFavicon(Resource):
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
@@ -418,16 +405,33 @@ class CreateWatch(Resource):
|
||||
|
||||
if json_data.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not json_data.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
if not plist or json_data.get('proxy') not in plist:
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(json_data)
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in json_data:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = json_data.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not watch)
|
||||
from changedetectionio import processors
|
||||
|
||||
extras = copy.deepcopy(json_data)
|
||||
|
||||
# Extract and remove processor config fields from extras
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(extras)
|
||||
|
||||
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
|
||||
tags = None
|
||||
if extras.get('tag'):
|
||||
@@ -437,10 +441,25 @@ class CreateWatch(Resource):
|
||||
del extras['url']
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||
|
||||
# Save processor config to separate JSON file
|
||||
if new_uuid and processor_config_data:
|
||||
processors.save_processor_config(self.datastore, new_uuid, processor_config_data)
|
||||
if new_uuid:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
# Dont queue because the scheduler will check that it hasnt been checked before anyway
|
||||
# worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
# Check if it was a limit issue
|
||||
page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
|
||||
if page_watch_limit:
|
||||
try:
|
||||
page_watch_limit = int(page_watch_limit)
|
||||
current_watch_count = len(self.datastore.data['watching'])
|
||||
if current_watch_count >= page_watch_limit:
|
||||
return f"Watch limit reached ({current_watch_count}/{page_watch_limit} watches). Cannot add more watches.", 429
|
||||
except ValueError:
|
||||
pass
|
||||
return "Invalid or unsupported URL", 400
|
||||
|
||||
@auth.check_token
|
||||
@@ -468,8 +487,58 @@ class CreateWatch(Resource):
|
||||
}
|
||||
|
||||
if request.args.get('recheck_all'):
|
||||
for uuid in self.datastore.data['watching'].keys():
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return {'status': "OK"}, 200
|
||||
# Collect all watches to queue
|
||||
watches_to_queue = self.datastore.data['watching'].keys()
|
||||
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = [
|
||||
uuid for uuid in watches_to_queue
|
||||
if uuid not in queued_uuids and uuid not in running_uuids
|
||||
]
|
||||
|
||||
# Queue only the filtered watches
|
||||
for uuid in watches_to_queue_filtered:
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
if skipped_count > 0:
|
||||
return {'status': f'OK, queued {len(watches_to_queue_filtered)} watches for rechecking ({skipped_count} already queued or running)'}, 200
|
||||
else:
|
||||
return {'status': f'OK, queued {len(watches_to_queue_filtered)} watches for rechecking'}, 200
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_all_watches_background():
|
||||
"""Background thread to queue all watches - discarded after completion."""
|
||||
try:
|
||||
queued_count = 0
|
||||
skipped_count = 0
|
||||
for uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if uuid not in queued_uuids and uuid not in running_uuids:
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
logger.info(f"Background queueing complete: {queued_count} watches queued, {skipped_count} skipped (already queued/running)")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing all watches: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=queue_all_watches_background, daemon=True, name="QueueAllWatches-Background")
|
||||
thread.start()
|
||||
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
return list, 200
|
||||
@@ -12,9 +12,17 @@ schema = api_schema.build_watch_json_schema(watch_base_config)
|
||||
schema_create_watch = copy.deepcopy(schema)
|
||||
schema_create_watch['required'] = ['url']
|
||||
del schema_create_watch['properties']['last_viewed']
|
||||
# Allow processor_config_* fields (handled separately in endpoint)
|
||||
schema_create_watch['patternProperties'] = {
|
||||
'^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
|
||||
}
|
||||
|
||||
schema_update_watch = copy.deepcopy(schema)
|
||||
schema_update_watch['additionalProperties'] = False
|
||||
# Allow processor_config_* fields (handled separately in endpoint)
|
||||
schema_update_watch['patternProperties'] = {
|
||||
'^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
|
||||
}
|
||||
|
||||
# Tag schema is also based on watch_base since Tag inherits from it
|
||||
schema_tag = copy.deepcopy(schema)
|
||||
|
||||
@@ -27,11 +27,23 @@ def create_backup(datastore_path, watches: dict):
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8) as zipObj:
|
||||
|
||||
# Add the index
|
||||
zipObj.write(os.path.join(datastore_path, "url-watches.json"), arcname="url-watches.json")
|
||||
# Add the settings file (supports both formats)
|
||||
# New format: changedetection.json
|
||||
changedetection_json = os.path.join(datastore_path, "changedetection.json")
|
||||
if os.path.isfile(changedetection_json):
|
||||
zipObj.write(changedetection_json, arcname="changedetection.json")
|
||||
logger.debug("Added changedetection.json to backup")
|
||||
|
||||
# Add the flask app secret
|
||||
zipObj.write(os.path.join(datastore_path, "secret.txt"), arcname="secret.txt")
|
||||
# Legacy format: url-watches.json (for backward compatibility)
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
if os.path.isfile(url_watches_json):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add the flask app secret (if it exists)
|
||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||
if os.path.isfile(secret_file):
|
||||
zipObj.write(secret_file, arcname="secret.txt")
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in watches.items():
|
||||
@@ -90,8 +102,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
# With immediate persistence, all data is already saved
|
||||
zip_thread = threading.Thread(
|
||||
target=create_backup,
|
||||
args=(datastore.datastore_path, datastore.data.get("watching")),
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h4>{{ _('Backups') }}</h4>
|
||||
<h2>{{ _('Backups') }}</h2>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<strong>{{ _('A backup is running!') }}</strong>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
|
||||
@@ -1,13 +1,8 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
import_distill_io_json,
|
||||
import_xlsx_wachete,
|
||||
import_xlsx_custom
|
||||
)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
import_blueprint = Blueprint('imports', __name__, template_folder="templates")
|
||||
@@ -17,15 +12,27 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
def import_page():
|
||||
remaining_urls = []
|
||||
from changedetectionio import forms
|
||||
|
||||
#
|
||||
if request.method == 'POST':
|
||||
# from changedetectionio import worker_pool
|
||||
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
import_distill_io_json,
|
||||
import_xlsx_wachete,
|
||||
import_xlsx_custom
|
||||
)
|
||||
|
||||
# URL List import
|
||||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
from changedetectionio import processors
|
||||
importer_handler = import_url_list()
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||
for uuid in importer_handler.new_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', processors.get_default_processor()))
|
||||
logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs")
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
if len(importer_handler.remaining_data) == 0:
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -37,8 +44,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Import and push into the queue for immediate update check
|
||||
d_importer = import_distill_io_json()
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
for uuid in d_importer.new_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# XLSX importer
|
||||
if request.files and request.files.get('xlsx_file'):
|
||||
@@ -60,8 +69,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
w_importer.import_profile = map
|
||||
w_importer.run(data=file, flash=flash, datastore=datastore)
|
||||
|
||||
for uuid in w_importer.new_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
||||
|
||||
@@ -62,7 +62,7 @@ class import_url_list(Importer):
|
||||
extras = None
|
||||
if processor:
|
||||
extras = {'processor': processor}
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False, extras=extras)
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, save_immediately=False, extras=extras)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
@@ -129,7 +129,7 @@ class import_distill_io_json(Importer):
|
||||
new_uuid = datastore.add_watch(url=d['uri'].strip(),
|
||||
tag=",".join(d.get('tags', [])),
|
||||
extras=extras,
|
||||
write_to_disk_now=False)
|
||||
save_immediately=False)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
@@ -204,7 +204,7 @@ class import_xlsx_wachete(Importer):
|
||||
new_uuid = datastore.add_watch(url=data['url'].strip(),
|
||||
extras=extras,
|
||||
tag=data.get('folder'),
|
||||
write_to_disk_now=False)
|
||||
save_immediately=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
@@ -287,7 +287,7 @@ class import_xlsx_custom(Importer):
|
||||
new_uuid = datastore.add_watch(url=url,
|
||||
extras=extras,
|
||||
tag=tags,
|
||||
write_to_disk_now=False)
|
||||
save_immediately=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
|
||||
@@ -4,7 +4,7 @@ from flask import Blueprint, flash, redirect, url_for
|
||||
from flask_login import login_required
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from queue import PriorityQueue
|
||||
|
||||
PRICE_DATA_TRACK_ACCEPT = 'accepted'
|
||||
@@ -20,13 +20,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
datastore.data['watching'][uuid].commit()
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
||||
def reject(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
||||
datastore.data['watching'][uuid].commit()
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
|
||||
|
||||
@@ -37,6 +37,8 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
# Get the watch by UUID
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
|
||||
@@ -74,26 +74,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
del (app_update['password'])
|
||||
|
||||
datastore.data['settings']['application'].update(app_update)
|
||||
|
||||
|
||||
# Handle dynamic worker count adjustment
|
||||
old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
|
||||
new_worker_count = form.data['requests'].get('workers', 1)
|
||||
|
||||
|
||||
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||
|
||||
datastore.commit()
|
||||
|
||||
# Adjust worker count if it changed
|
||||
if new_worker_count != old_worker_count:
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
|
||||
|
||||
result = worker_handler.adjust_async_worker_count(
|
||||
|
||||
# Check CPU core availability and warn if worker count is high
|
||||
cpu_count = os.cpu_count()
|
||||
if cpu_count and new_worker_count >= (cpu_count * 0.9):
|
||||
flash(gettext("Warning: Worker count ({}) is close to or exceeds available CPU cores ({})").format(
|
||||
new_worker_count, cpu_count), 'warning')
|
||||
|
||||
result = worker_pool.adjust_async_worker_count(
|
||||
new_count=new_worker_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=app,
|
||||
datastore=ds
|
||||
)
|
||||
|
||||
|
||||
if result['status'] == 'success':
|
||||
flash(gettext("Worker count adjusted: {}").format(result['message']), 'notice')
|
||||
elif result['status'] == 'not_supported':
|
||||
@@ -103,13 +110,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
|
||||
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
|
||||
datastore.needs_write_urgent = True
|
||||
datastore.commit()
|
||||
flash(gettext("Password protection enabled."), 'notice')
|
||||
flask_login.logout_user()
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Also save plugin settings from the same form submission
|
||||
plugin_tabs_list = get_plugin_settings_tabs()
|
||||
for tab in plugin_tabs_list:
|
||||
@@ -175,7 +180,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def settings_reset_api_key():
|
||||
secret = secrets.token_hex(16)
|
||||
datastore.data['settings']['application']['api_access_token'] = secret
|
||||
datastore.needs_write_urgent = True
|
||||
datastore.commit()
|
||||
flash(gettext("API Key was regenerated."))
|
||||
return redirect(url_for('settings.settings_page')+'#api')
|
||||
|
||||
@@ -187,4 +192,32 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."])
|
||||
return output
|
||||
|
||||
@settings_blueprint.route("/toggle-all-paused", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def toggle_all_paused():
|
||||
current_state = datastore.data['settings']['application'].get('all_paused', False)
|
||||
datastore.data['settings']['application']['all_paused'] = not current_state
|
||||
datastore.commit()
|
||||
|
||||
if datastore.data['settings']['application']['all_paused']:
|
||||
flash(gettext("Automatic scheduling paused - checks will not be queued."), 'notice')
|
||||
else:
|
||||
flash(gettext("Automatic scheduling resumed - checks will be queued normally."), 'notice')
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@settings_blueprint.route("/toggle-all-muted", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def toggle_all_muted():
|
||||
current_state = datastore.data['settings']['application'].get('all_muted', False)
|
||||
datastore.data['settings']['application']['all_muted'] = not current_state
|
||||
datastore.commit()
|
||||
|
||||
if datastore.data['settings']['application']['all_muted']:
|
||||
flash(gettext("All notifications muted."), 'notice')
|
||||
else:
|
||||
flash(gettext("All notifications unmuted."), 'notice')
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return settings_blueprint
|
||||
@@ -25,6 +25,7 @@
|
||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
@@ -53,33 +54,50 @@
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
|
||||
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
|
||||
<span class="pure-form-message-inline">{{ _('After this many consecutive times that the CSS/xPath filter is missing, send a notification') }}
|
||||
<br>
|
||||
Set to <strong>0</strong> to disable
|
||||
{{ _('Set to') }} <strong>0</strong> {{ _('to disable') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.history_snapshot_max_length, class="history_snapshot_max_length") }}
|
||||
<span class="pure-form-message-inline">{{ _('Limit collection of history snapshots for each watch to this number of history items.') }}
|
||||
<br>
|
||||
{{ _('Set to empty to disable / no limit') }}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{% if not hide_remove_pass %}
|
||||
{% if current_user.is_authenticated %}
|
||||
{{ render_button(form.application.form.removepassword_button) }}
|
||||
{% else %}
|
||||
{{ render_field(form.application.form.password) }}
|
||||
<span class="pure-form-message-inline">Password protection for your changedetection.io application.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Password protection for your changedetection.io application.') }}</span>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<span class="pure-form-message-inline">Password is locked.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Password is locked.') }}</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
|
||||
<span class="pure-form-message-inline">Allow access to the watch change history page when password is enabled (Good for sharing the diff page)
|
||||
</span>
|
||||
<span class="pure-form-message-inline">{{ _('Allow access to the watch change history page when password is enabled (Good for sharing the diff page)') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||
<span class="pure-form-message-inline">{{ _('When a request returns no content, or the HTML does not contain any text, is this considered a change?') }}</span>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">{{ _('Choose a default proxy for all watches') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
@@ -90,8 +108,8 @@
|
||||
<div class="pure-control-group" id="notification-base-url">
|
||||
{{ render_field(form.application.form.base_url, class="m-d") }}
|
||||
<span class="pure-form-message-inline">
|
||||
Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notification links.<br>
|
||||
Default value is the system environment variable '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
|
||||
{{ _('Base URL used for the') }} <code>{{ '{{ base_url }}' }}</code> {{ _('token in notification links.') }}<br>
|
||||
{{ _('Default value is the system environment variable') }} '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">{{ _('read more here') }}</a>.
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
@@ -100,15 +118,15 @@
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||
<p>{{ _('Use the') }} <strong>{{ _('Basic') }}</strong> {{ _('method (default) where your watched sites don\'t need Javascript to render.') }}</p>
|
||||
<p>{{ _('The') }} <strong>{{ _('Chrome/Javascript') }}</strong> {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var') }} 'WEBDRIVER_URL'. </p>
|
||||
</span>
|
||||
</div>
|
||||
<fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver">
|
||||
<div class="pure-form-message-inline">
|
||||
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
|
||||
<strong>{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}</strong>
|
||||
<br>
|
||||
This will wait <i>n</i> seconds before extracting the text.
|
||||
{{ _('This will wait') }} <i>n</i> {{ _('seconds before extracting the text.') }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.webdriver_delay) }}
|
||||
@@ -117,27 +135,27 @@
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.workers) }}
|
||||
{% set worker_info = get_worker_status_info() %}
|
||||
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
|
||||
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.') }}<br>
|
||||
{{ _('Currently running:') }} <strong>{{ worker_info.count }}</strong> {{ _('operational') }} {{ worker_info.type }} {{ _('workers') }}{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} {{ _('actively processing') }}){% endif %}.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
|
||||
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
|
||||
<span class="pure-form-message-inline">{{ _('Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.timeout) }}
|
||||
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.</span><br>
|
||||
<span class="pure-form-message-inline">{{ _('For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.') }}</span><br>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.default_ua) }}
|
||||
<span class="pure-form-message-inline">
|
||||
Applied to all requests.<br><br>
|
||||
Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it's important to consider <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">all of the ways that the browser is detected</a>.
|
||||
{{ _('Applied to all requests.') }}<br><br>
|
||||
{{ _('Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it\'s important to consider') }} <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">{{ _('all of the ways that the browser is detected') }}</a>.
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
@@ -146,15 +164,15 @@
|
||||
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.ignore_whitespace) }}
|
||||
<span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
<span class="pure-form-message-inline">{{ _('Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.') }}<br>
|
||||
<i>{{ _('Note:') }}</i> {{ _('Changing this will change the status of your existing watches, possibly trigger alerts etc.') }}
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.render_anchor_tag_content) }}
|
||||
<span class="pure-form-message-inline">Render anchor tag content, default disabled, when enabled renders links as <code>(link text)[https://somesite.com]</code>
|
||||
<span class="pure-form-message-inline">{{ _('Render anchor tag content, default disabled, when enabled renders links as') }} <code>(link text)[https://somesite.com]</code>
|
||||
<br>
|
||||
<i>Note:</i> Changing this could affect the content of your existing watches, possibly trigger alerts etc.
|
||||
<i>{{ _('Note:') }}</i> {{ _('Changing this could affect the content of your existing watches, possibly trigger alerts etc.') }}
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
@@ -165,9 +183,9 @@ nav
|
||||
//*[contains(text(), 'Advertisement')]") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||
<li> {{ _('Remove HTML element(s) by CSS and XPath selectors before text conversion.') }} </li>
|
||||
<li> {{ _('Don\'t paste HTML here, use only CSS and XPath selectors') }} </li>
|
||||
<li> {{ _('Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML.') }} </li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
@@ -175,50 +193,50 @@ nav
|
||||
{{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
|
||||
/some.regex\d{2}/ for case-INsensitive regex
|
||||
") }}
|
||||
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
|
||||
<span class="pure-form-message-inline">{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</span><br>
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
|
||||
<li>Note: This is applied globally in addition to the per-watch rules.</li>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
<li>{{ _('Matching text will be') }} <strong>{{ _('ignored') }}</strong> {{ _('in the text snapshot (you can still see it but it wont trigger a change)') }}</li>
|
||||
<li>{{ _('Note: This is applied globally in addition to the per-watch rules.') }}</li>
|
||||
<li>{{ _('Each line processed separately, any line matching will be ignored (removed before creating the checksum)') }}</li>
|
||||
<li>{{ _('Regular Expression support, wrap the entire line in forward slash') }} <code>/regex/</code></li>
|
||||
<li>{{ _('Changing this will affect the comparison checksum which may trigger an alert') }}</li>
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.strip_ignored_lines) }}
|
||||
<span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
<span class="pure-form-message-inline">{{ _('Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)') }}<br>
|
||||
<i>{{ _('Note:') }}</i> {{ _('Changing this will change the status of your existing watches, possibly trigger alerts etc.') }}
|
||||
</span>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="api">
|
||||
<h4>API Access</h4>
|
||||
<p>Drive your changedetection.io via API, More about <a href="https://changedetection.io/docs/api_v1/index.html">API access and examples here</a>.</p>
|
||||
<h4>{{ _('API Access') }}</h4>
|
||||
<p>{{ _('Drive your changedetection.io via API, More about') }} <a href="https://changedetection.io/docs/api_v1/index.html">{{ _('API access and examples here') }}</a>.</p>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
|
||||
<div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header - required for the Chrome Extension to work</div><br>
|
||||
<div class="pure-form-message-inline"><br>API Key <span id="api-key">{{api_key}}</span>
|
||||
<span style="display:none;" id="api-key-copy" >copy</span>
|
||||
<div class="pure-form-message-inline">{{ _('Restrict API access limit by using') }} <code>x-api-key</code> {{ _('header - required for the Chrome Extension to work') }}</div><br>
|
||||
<div class="pure-form-message-inline"><br>{{ _('API Key') }} <span id="api-key">{{api_key}}</span>
|
||||
<span style="display:none;" id="api-key-copy" >{{ _('copy') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<a href="{{url_for('settings.settings_reset_api_key')}}" class="pure-button button-small button-cancel">Regenerate API key</a>
|
||||
<a href="{{url_for('settings.settings_reset_api_key')}}" class="pure-button button-small button-cancel">{{ _('Regenerate API key') }}</a>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<h4>Chrome Extension</h4>
|
||||
<p>Easily add any web-page to your changedetection.io installation from within Chrome.</p>
|
||||
<strong>Step 1</strong> Install the extension, <strong>Step 2</strong> Navigate to this page,
|
||||
<strong>Step 3</strong> Open the extension from the toolbar and click "<i>Sync API Access</i>"
|
||||
<h4>{{ _('Chrome Extension') }}</h4>
|
||||
<p>{{ _('Easily add any web-page to your changedetection.io installation from within Chrome.') }}</p>
|
||||
<strong>{{ _('Step 1') }}</strong> {{ _('Install the extension,') }} <strong>{{ _('Step 2') }}</strong> {{ _('Navigate to this page,') }}
|
||||
<strong>{{ _('Step 3') }}</strong> {{ _('Open the extension from the toolbar and click') }} "<i>{{ _('Sync API Access') }}</i>"
|
||||
<p>
|
||||
<a id="chrome-extension-link"
|
||||
title="Try our new Chrome Extension!"
|
||||
title="{{ _('Try our new Chrome Extension!') }}"
|
||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" >
|
||||
Chrome Webstore
|
||||
<img alt="{{ _('Chrome store icon') }}" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" >
|
||||
{{ _('Chrome Webstore') }}
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
@@ -229,20 +247,20 @@ nav
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_diff_length) }}
|
||||
<span class="pure-form-message-inline">Maximum number of history snapshots to include in the watch specific RSS feed.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Maximum number of history snapshots to include in the watch specific RSS feed.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
|
||||
<span class="pure-form-message-inline">For watching other RSS feeds - When watching RSS/Atom feeds, convert them into clean text for better change detection.</span>
|
||||
<span class="pure-form-message-inline">{{ _('For watching other RSS feeds - When watching RSS/Atom feeds, convert them into clean text for better change detection.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group grey-form-border">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">Does your reader support HTML? Set it here</span>
|
||||
<span class="pure-form-message-inline">{{ _('Does your reader support HTML? Set it here') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_template_type) }}
|
||||
<span class="pure-form-message-inline">'System default' for the same template for all items, or re-use your "Notification Body" as the template.</span>
|
||||
<span class="pure-form-message-inline">{{ _('\'System default\' for the same template for all items, or re-use your "Notification Body" as the template.') }}</span>
|
||||
</div>
|
||||
<div>
|
||||
{{ render_field(form.application.form.rss_template_override) }}
|
||||
@@ -255,11 +273,11 @@ nav
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="timedate">
|
||||
<div class="pure-control-group">
|
||||
Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.
|
||||
{{ _('Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.') }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
||||
<p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
||||
<p><strong>{{ _('UTC Time & Date from Server:') }}</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
||||
<p><strong>{{ _('Local Time & Date in Browser:') }}</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
||||
<div>
|
||||
{{ render_field(form.application.form.scheduler_timezone_default) }}
|
||||
<datalist id="timezones" style="display: none;">
|
||||
@@ -271,22 +289,22 @@ nav
|
||||
<div class="tab-pane-inner" id="ui-options">
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
|
||||
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }}
|
||||
<span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span>
|
||||
<span class="pure-form-message-inline">{{ _('Realtime UI Updates Enabled - (Restart required if this is changed)') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
|
||||
<span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
|
||||
<span class="pure-form-message-inline">{{ _('Enable or Disable Favicons next to the watch list') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.pager_size) }}
|
||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||
<span class="pure-form-message-inline">{{ _('Number of items per page in the watch overview list, 0 to disable.') }}</span>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
@@ -334,21 +352,12 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.</p>
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
<span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br>
|
||||
<span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">Choose a default proxy for all watches</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
<span class="pure-form-message-inline">{{ _('"Name" will be used for selecting the proxy in the Watch Edit settings') }}</span><br>
|
||||
<span class="pure-form-message-inline">{{ _('SOCKS5 proxies with authentication are only supported with \'plain requests\' fetcher, for other fetchers you should whitelist the IP access instead') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group" id="extra-browsers-setting">
|
||||
<p>
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import threading
|
||||
from flask import Blueprint, request, render_template, flash, url_for, redirect
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
@@ -57,44 +59,83 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def mute(uuid):
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = not datastore.data['settings']['application']['tags'][uuid]['notification_muted']
|
||||
datastore.commit()
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete(uuid):
|
||||
removed = 0
|
||||
# Delete the tag, and any tag reference
|
||||
# Delete the tag from settings immediately
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
del datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
removed += 1
|
||||
watch['tags'].remove(uuid)
|
||||
# Remove tag from all watches in background thread to avoid blocking
|
||||
def remove_tag_background(tag_uuid):
|
||||
"""Background thread to remove tag from watches - discarded after completion."""
|
||||
removed_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
watch.commit()
|
||||
removed_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} removed from {removed_count} watches")
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing tag from watches: {e}")
|
||||
|
||||
flash(gettext("Tag deleted and removed from {} watches").format(removed))
|
||||
# Start daemon thread
|
||||
threading.Thread(target=remove_tag_background, args=(uuid,), daemon=True).start()
|
||||
|
||||
flash(gettext("Tag deleted, removing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/unlink/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def unlink(uuid):
|
||||
unlinked = 0
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
unlinked += 1
|
||||
watch['tags'].remove(uuid)
|
||||
# Unlink tag from all watches in background thread to avoid blocking
|
||||
def unlink_tag_background(tag_uuid):
|
||||
"""Background thread to unlink tag from watches - discarded after completion."""
|
||||
unlinked_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
watch.commit()
|
||||
unlinked_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} unlinked from {unlinked_count} watches")
|
||||
except Exception as e:
|
||||
logger.error(f"Error unlinking tag from watches: {e}")
|
||||
|
||||
flash(gettext("Tag unlinked removed from {} watches").format(unlinked))
|
||||
# Start daemon thread
|
||||
threading.Thread(target=unlink_tag_background, args=(uuid,), daemon=True).start()
|
||||
|
||||
flash(gettext("Unlinking tag from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete_all", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete_all():
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
watch['tags'] = []
|
||||
# Clear all tags from settings immediately
|
||||
datastore.data['settings']['application']['tags'] = {}
|
||||
datastore.commit()
|
||||
|
||||
flash(gettext("All tags deleted"))
|
||||
# Clear tags from all watches in background thread to avoid blocking
|
||||
def clear_all_tags_background():
|
||||
"""Background thread to clear tags from all watches - discarded after completion."""
|
||||
cleared_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
watch['tags'] = []
|
||||
watch.commit()
|
||||
cleared_count += 1
|
||||
logger.info(f"Background: Cleared tags from {cleared_count} watches")
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing tags from watches: {e}")
|
||||
|
||||
# Start daemon thread
|
||||
threading.Thread(target=clear_all_tags_background, daemon=True).start()
|
||||
|
||||
flash(gettext("All tags deleted, clearing from watches in background"))
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/edit/<string:uuid>", methods=['GET'])
|
||||
@@ -180,7 +221,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
datastore.data['settings']['application']['tags'][uuid].update(form.data)
|
||||
datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.needs_write_urgent = True
|
||||
datastore.commit()
|
||||
flash(gettext("Updated"))
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import time
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||
import threading
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session, current_app
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
@@ -9,7 +10,7 @@ from changedetectionio.blueprint.ui.notification import construct_blueprint as c
|
||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||
from changedetectionio.blueprint.ui import diff, preview
|
||||
|
||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
from flask import request, flash
|
||||
|
||||
if op == 'delete':
|
||||
@@ -23,6 +24,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['paused'] = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches paused").format(len(uuids)))
|
||||
|
||||
@@ -30,6 +32,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches unpaused").format(len(uuids)))
|
||||
|
||||
@@ -44,6 +47,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches muted").format(len(uuids)))
|
||||
|
||||
@@ -51,6 +55,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches un-muted").format(len(uuids)))
|
||||
|
||||
@@ -58,7 +63,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Recheck and require a full reprocessing
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches queued for rechecking").format(len(uuids)))
|
||||
|
||||
@@ -66,6 +71,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]["last_error"] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches errors cleared").format(len(uuids)))
|
||||
|
||||
@@ -86,6 +92,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
datastore.data['watching'][uuid]['notification_body'] = None
|
||||
datastore.data['watching'][uuid]['notification_urls'] = []
|
||||
datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches set to use default notification settings").format(len(uuids)))
|
||||
|
||||
@@ -101,6 +108,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
datastore.data['watching'][uuid]['tags'] = []
|
||||
|
||||
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches were tagged").format(len(uuids)))
|
||||
|
||||
@@ -108,7 +116,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update):
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool, queuedWatchMetaData, watch_check_update):
|
||||
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
||||
|
||||
# Register the edit blueprint
|
||||
@@ -151,9 +159,24 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
confirmtext = request.form.get('confirmtext')
|
||||
|
||||
if confirmtext == 'clear':
|
||||
for uuid in datastore.data['watching'].keys():
|
||||
datastore.clear_watch_history(uuid)
|
||||
flash(gettext("Cleared snapshot history for all watches"))
|
||||
# Run in background thread to avoid blocking
|
||||
def clear_history_background():
|
||||
# Capture UUIDs first to avoid race conditions
|
||||
watch_uuids = list(datastore.data['watching'].keys())
|
||||
logger.info(f"Background: Clearing history for {len(watch_uuids)} watches")
|
||||
|
||||
for uuid in watch_uuids:
|
||||
try:
|
||||
datastore.clear_watch_history(uuid)
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing history for watch {uuid}: {e}")
|
||||
|
||||
logger.info("Background: Completed clearing history")
|
||||
|
||||
# Start daemon thread
|
||||
threading.Thread(target=clear_history_background, daemon=True).start()
|
||||
|
||||
flash(gettext("History clearing started in background"))
|
||||
else:
|
||||
flash(gettext('Incorrect confirmation text.'), 'error')
|
||||
|
||||
@@ -169,32 +192,46 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
# Save the current newest history as the most recently viewed
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
tag_limit = request.args.get('tag')
|
||||
logger.debug(f"Limiting to tag {tag_limit}")
|
||||
now = int(time.time())
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
|
||||
if tag_limit and ( not watch.get('tags') or tag_limit not in watch['tags'] ):
|
||||
logger.debug(f"Skipping watch {watch_uuid}")
|
||||
continue
|
||||
# Mark watches as viewed in background thread to avoid blocking
|
||||
def mark_viewed_background():
|
||||
"""Background thread to mark watches as viewed - discarded after completion."""
|
||||
marked_count = 0
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
if tag_limit and (not watch.get('tags') or tag_limit not in watch['tags']):
|
||||
continue
|
||||
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
marked_count += 1
|
||||
|
||||
logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background mark as viewed: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=mark_viewed_background, daemon=True)
|
||||
thread.start()
|
||||
|
||||
flash(gettext("Marking watches as viewed in background..."))
|
||||
return redirect(url_for('watchlist.index', tag=tag_limit))
|
||||
|
||||
@ui_blueprint.route("/delete", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def form_delete():
|
||||
uuid = request.args.get('uuid')
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if uuid != 'all' and not uuid in datastore.data['watching'].keys():
|
||||
flash(gettext('The watch by UUID {} does not exist.').format(uuid), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
datastore.delete(uuid)
|
||||
flash(gettext('Deleted.'))
|
||||
|
||||
@@ -204,14 +241,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
@login_optionally_required
|
||||
def form_clone():
|
||||
uuid = request.args.get('uuid')
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
new_uuid = datastore.clone(uuid)
|
||||
|
||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
|
||||
flash(gettext('Cloned, you are editing the new watch.'))
|
||||
|
||||
@@ -225,38 +262,81 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
uuid = request.args.get('uuid')
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
|
||||
i = 0
|
||||
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
|
||||
if uuid:
|
||||
if uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
i += 1
|
||||
|
||||
# Single watch - check if already queued or running
|
||||
if worker_pool.is_watch_running(uuid) or uuid in update_q.get_queued_uuids():
|
||||
flash(gettext("Watch is already queued or being checked."))
|
||||
else:
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
else:
|
||||
# Recheck all, including muted
|
||||
# Get most overdue first
|
||||
# Multiple watches - first count how many need to be queued
|
||||
watches_to_queue = []
|
||||
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||
watch_uuid = k[0]
|
||||
watch = k[1]
|
||||
if not watch['paused']:
|
||||
if watch_uuid not in running_uuids:
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
if not watch['paused'] and watch_uuid:
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
if tag != None and tag not in watch['tags']:
|
||||
continue
|
||||
watches_to_queue.append(watch_uuid)
|
||||
|
||||
if tag != None and tag not in watch['tags']:
|
||||
continue
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
i += 1
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = []
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid not in queued_uuids and watch_uuid not in running_uuids:
|
||||
watches_to_queue_filtered.append(watch_uuid)
|
||||
|
||||
if i == 1:
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
if i > 1:
|
||||
flash(gettext("Queued {} watches for rechecking.").format(i))
|
||||
if i == 0:
|
||||
flash(gettext("No watches available to recheck."))
|
||||
# Queue only the filtered watches
|
||||
for watch_uuid in watches_to_queue_filtered:
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
if skipped_count > 0:
|
||||
flash(gettext("Queued {} watches for rechecking ({} already queued or running).").format(
|
||||
len(watches_to_queue_filtered), skipped_count))
|
||||
else:
|
||||
if len(watches_to_queue_filtered) == 1:
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
else:
|
||||
flash(gettext("Queued {} watches for rechecking.").format(len(watches_to_queue_filtered)))
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking HTTP response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_watches_background():
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
try:
|
||||
queued_count = 0
|
||||
skipped_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if watch_uuid not in queued_uuids and watch_uuid not in running_uuids:
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
logger.info(f"Background queueing complete: {queued_count} watches queued, {skipped_count} skipped (already queued/running)")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=queue_watches_background, daemon=True, name="QueueWatches-Background")
|
||||
thread.start()
|
||||
|
||||
# Return immediately with approximate message
|
||||
flash(gettext("Queueing watches for rechecking in background..."))
|
||||
|
||||
return redirect(url_for('watchlist.index', **({'tag': tag} if tag else {})))
|
||||
|
||||
@@ -271,7 +351,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
extra_data=extra_data,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
uuids=uuids,
|
||||
worker_handler=worker_handler,
|
||||
worker_pool=worker_pool,
|
||||
update_q=update_q,
|
||||
watch_check_update=watch_check_update,
|
||||
op=op,
|
||||
@@ -289,9 +369,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
import json
|
||||
from copy import deepcopy
|
||||
|
||||
# more for testing
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
# copy it to memory as trim off what we dont need (history)
|
||||
watch = deepcopy(datastore.data['watching'].get(uuid))
|
||||
@@ -331,4 +408,25 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/language/auto-detect", methods=['GET'])
|
||||
def delete_locale_language_session_var_if_it_exists():
|
||||
"""Clear the session locale preference to auto-detect from browser Accept-Language header"""
|
||||
if 'locale' in session:
|
||||
session.pop('locale', None)
|
||||
# Refresh Flask-Babel to clear cached locale
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
flash(gettext("Language set to auto-detect from browser"))
|
||||
|
||||
# Check if there's a redirect parameter to return to the same page
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, use it
|
||||
from changedetectionio.is_safe_url import is_safe_url
|
||||
if redirect_url and is_safe_url(redirect_url, current_app):
|
||||
return redirect(redirect_url)
|
||||
|
||||
# Otherwise redirect to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return ui_blueprint
|
||||
@@ -83,7 +83,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
If a processor doesn't have a difference module, falls back to text_json_diff.
|
||||
"""
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -101,23 +100,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have difference module, use text_json_diff as default
|
||||
from changedetectionio.processors.text_json_diff.difference import render as default_render
|
||||
@@ -144,10 +141,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/extract.py::render_form()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -157,23 +154,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
|
||||
# Call the processor's render_form() function
|
||||
if hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
# Call the processor's render_form() function
|
||||
if processor_module and hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import render_form as default_render_form
|
||||
@@ -200,7 +195,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/extract.py::process_extraction()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -213,24 +208,22 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
|
||||
# Call the processor's process_extraction() function
|
||||
if hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
# Call the processor's process_extraction() function
|
||||
if processor_module and hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import process_extraction as default_process_extraction
|
||||
@@ -267,7 +260,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
- /diff/{uuid}/processor-asset/after
|
||||
- /diff/{uuid}/processor-asset/rendered_diff
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -280,38 +273,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
return diff_blueprint
|
||||
|
||||
@@ -9,7 +9,7 @@ from jinja2 import Environment, FileSystemLoader
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio.time_handler import is_within_schedule
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||
@@ -30,14 +30,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
from changedetectionio import processors
|
||||
import importlib
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
# More for testing, possible to return the first/only
|
||||
if not datastore.data['watching'].keys():
|
||||
flash(gettext("No watches to edit"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if not uuid in datastore.data['watching']:
|
||||
flash(gettext("No watch with the UUID {} found.").format(uuid), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -52,7 +51,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
redirect(url_for('ui_edit.edit_page', uuid=uuid))
|
||||
|
||||
# be sure we update with a copy instead of accidently editing the live object by reference
|
||||
default = deepcopy(datastore.data['watching'][uuid])
|
||||
default = None
|
||||
while not default:
|
||||
try:
|
||||
default = deepcopy(datastore.data['watching'][uuid])
|
||||
except RuntimeError as e:
|
||||
# Dictionary changed
|
||||
continue
|
||||
|
||||
# Defaults for proxy choice
|
||||
if datastore.proxy_list is not None: # When enabled
|
||||
@@ -66,8 +71,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
||||
if not processor_classes:
|
||||
flash(gettext("Cannot load the edit form for processor/plugin '{}', plugin missing?").format(processor_classes[1]), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing. Please select a different processor.").format(processor_name), 'error')
|
||||
# Fall back to default processor so user can still edit and change processor
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == 'text_json_diff'), None)
|
||||
if not processor_classes:
|
||||
# If even text_json_diff is missing, something is very wrong
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing.").format(processor_name), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
parent_module = processors.get_parent_module(processor_classes[0])
|
||||
|
||||
@@ -144,58 +154,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
processor_config_data = {}
|
||||
fields_to_remove = []
|
||||
for field_name, field_value in form.data.items():
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
if field_value: # Only save non-empty values
|
||||
processor_config_data[config_key] = field_value
|
||||
fields_to_remove.append(field_name)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = form.data.get('processor')
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = processors.difference_detection_processor(datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
# Try to import the edit_hook module from the processor package
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
|
||||
watch_obj = datastore.data['watching'][uuid]
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
|
||||
# Remove processor-config-* fields from form.data before updating datastore
|
||||
for field_name in fields_to_remove:
|
||||
form.data.pop(field_name, None)
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
# IMPORTANT: These must NOT be saved to url-watches.json, only to the processor-specific JSON file
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(form.data)
|
||||
processors.save_processor_config(datastore, uuid, processor_config_data)
|
||||
|
||||
# Ignore text
|
||||
form_ignore_text = form.ignore_text.data
|
||||
@@ -235,7 +197,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Recast it if need be to right data Watch handler
|
||||
watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, default=datastore.data['watching'][uuid])
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore.data, default=datastore.data['watching'][uuid])
|
||||
|
||||
# Save the watch immediately
|
||||
datastore.data['watching'][uuid].commit()
|
||||
|
||||
flash(gettext("Updated watch - unpaused!") if request.args.get('unpause_on_save') else gettext("Updated watch."))
|
||||
|
||||
# Cleanup any browsersteps session for this watch
|
||||
@@ -245,10 +211,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
except Exception as e:
|
||||
logger.debug(f"Error cleaning up browsersteps session: {e}")
|
||||
|
||||
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
|
||||
# But in the case something is added we should save straight away
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Do not queue on edit if its not within the time range
|
||||
|
||||
# @todo maybe it should never queue anyway on edit...
|
||||
@@ -277,7 +239,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
#############################
|
||||
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -305,10 +267,17 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Get fetcher capabilities instead of hardcoded logic
|
||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||
|
||||
# Add processor capabilities from module
|
||||
capabilities['supports_visual_selector'] = getattr(parent_module, 'supports_visual_selector', False)
|
||||
capabilities['supports_text_filters_and_triggers'] = getattr(parent_module, 'supports_text_filters_and_triggers', False)
|
||||
capabilities['supports_text_filters_and_triggers_elements'] = getattr(parent_module, 'supports_text_filters_and_triggers_elements', False)
|
||||
capabilities['supports_request_type'] = getattr(parent_module, 'supports_request_type', False)
|
||||
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
||||
|
||||
c = [f"processor-{watch.get('processor')}"]
|
||||
if worker_handler.is_watch_running(uuid):
|
||||
if worker_pool.is_watch_running(uuid):
|
||||
c.append('checking-now')
|
||||
|
||||
template_args = {
|
||||
@@ -365,6 +334,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
from flask import send_file
|
||||
import brotli
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[-1]
|
||||
@@ -389,6 +360,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
def watch_get_preview_rendered(uuid):
|
||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||
from flask import jsonify
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
from changedetectionio.processors.text_json_diff import prepare_filter_prevew
|
||||
result = prepare_filter_prevew(watch_uuid=uuid, form_data=request.form, datastore=datastore)
|
||||
return jsonify(result)
|
||||
@@ -412,6 +386,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
s = re.sub(r'[0-9]+', r'\\d+', s)
|
||||
datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/')
|
||||
|
||||
# Save the updated ignore_text
|
||||
datastore.data["watching"][uuid].commit()
|
||||
|
||||
return f"<a href={url_for('ui.ui_preview.preview_page', uuid=uuid)}>Click to preview</a>"
|
||||
|
||||
return edit_blueprint
|
||||
@@ -26,10 +26,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/preview.py::render()
|
||||
If a processor doesn't have a preview module, falls back to default text preview.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -39,24 +38,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have preview module, use default text preview
|
||||
content = []
|
||||
@@ -150,10 +146,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
"""
|
||||
from flask import make_response
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -163,39 +157,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
return preview_blueprint
|
||||
|
||||
@@ -45,14 +45,19 @@
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="#general">{{ _('General') }}</a></li>
|
||||
{% if capabilities.supports_request_type %}
|
||||
<li class="tab"><a href="#request">{{ _('Request') }}</a></li>
|
||||
{% endif %}
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">{{ _('Browser Steps') }}</a></li>
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
{% endif %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
||||
{% endif %}
|
||||
@@ -110,12 +115,20 @@
|
||||
{{ _('Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.history_snapshot_max_length, class="history_snapshot_max_length") }}
|
||||
<span class="pure-form-message-inline">{{ _('Limit collection of history snapshots for each watch to this number of history items.') }}
|
||||
<br>
|
||||
{{ _('Set to empty to use system settings default') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_ternary_field(form.use_page_title_in_list) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if capabilities.supports_request_type %}
|
||||
<div class="tab-pane-inner" id="request">
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
@@ -203,6 +216,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="tab-pane-inner" id="browser-steps">
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
@@ -283,8 +297,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
<div class="tab-pane-inner" id="conditions">
|
||||
<script>
|
||||
const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}";
|
||||
@@ -303,7 +316,9 @@ Math: {{ 1 + 1 }}") }}
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
||||
<div>
|
||||
<div id="edit-text-filter">
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers_elements %}
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
<strong>{{ _('Pro-tips:') }}</strong><br>
|
||||
<ul>
|
||||
<li>
|
||||
@@ -314,8 +329,8 @@ Math: {{ 1 + 1 }}") }}
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{% include "edit/include_subtract.html" %}
|
||||
{% endif %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
@@ -374,7 +389,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ extra_form_content|safe }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
|
||||
@@ -386,7 +401,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ _('The Visual Selector tool lets you select the') }} <i>{{ _('text') }}</i> {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} <a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a> {{ _('tab. Use') }} <strong>{{ _('Shift+Click') }}</strong> {{ _('to select multiple items.') }}
|
||||
</span>
|
||||
|
||||
{% if watch['processor'] == 'image_ssim_diff' %}
|
||||
{% if watch['processor'] == 'image_ssim_diff' %} {# @todo, integrate with image_ssim_diff selector better, use some extra form ? #}
|
||||
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
|
||||
<label style="font-weight: 600; margin-right: 15px;">{{ _('Selection Mode:') }}</label>
|
||||
<label style="margin-right: 15px;">
|
||||
|
||||
@@ -2,7 +2,7 @@ from flask import Blueprint, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
|
||||
@@ -24,8 +24,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
flash(gettext('Warning, URL {} already exists').format(url), "notice")
|
||||
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
processor = request.form.get('processor', 'text_json_diff')
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
from changedetectionio import processors
|
||||
processor = request.form.get('processor', processors.get_default_processor())
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags','').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
|
||||
if new_uuid:
|
||||
if add_paused:
|
||||
@@ -33,9 +34,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||
else:
|
||||
# Straight into the queue.
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
flash(gettext("Watch added."))
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
||||
|
||||
return views_blueprint
|
||||
return views_blueprint
|
||||
|
||||
@@ -2,8 +2,8 @@ import os
|
||||
import time
|
||||
|
||||
from flask import Blueprint, request, make_response, render_template, redirect, url_for, flash, session
|
||||
from flask_login import current_user
|
||||
from flask_paginate import Pagination, get_page_parameter
|
||||
from flask_babel import gettext as _
|
||||
|
||||
from changedetectionio import forms
|
||||
from changedetectionio import processors
|
||||
@@ -39,7 +39,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
elif op == 'mute':
|
||||
datastore.data['watching'][uuid].toggle_mute()
|
||||
|
||||
datastore.needs_write = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
return redirect(url_for('watchlist.index', tag = active_tag_uuid))
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
@@ -74,7 +74,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
pagination = Pagination(page=page,
|
||||
total=total_count,
|
||||
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
|
||||
per_page=datastore.data['settings']['application'].get('pager_size', 50),
|
||||
css_framework="semantic",
|
||||
display_msg=_('displaying <b>{start} - {end}</b> {record_name} in total <b>{total}</b>'),
|
||||
record_name=_('records'))
|
||||
|
||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||
|
||||
@@ -85,6 +88,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||
datastore=datastore,
|
||||
errored_count=errored_count,
|
||||
extra_classes='has-queue' if not update_q.empty() else '',
|
||||
form=form,
|
||||
generate_tag_colors=processors.generate_processor_badge_colors,
|
||||
guid=datastore.data['app_guid'],
|
||||
@@ -92,10 +96,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
processor_badge_css=processors.get_processor_badge_css(),
|
||||
processor_badge_texts=processors.get_processor_badge_texts(),
|
||||
processor_descriptions=processors.get_processor_descriptions(),
|
||||
processor_badge_css=processors.get_processor_badge_css(),
|
||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||
queue_size=update_q.qsize(),
|
||||
queued_uuids=update_q.get_queued_uuids(),
|
||||
search_q=request.args.get('q', '').strip(),
|
||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
{%- extends 'base.html' -%}
|
||||
{%- block content -%}
|
||||
{%- set tips = [
|
||||
_("Changedetection.io can monitor more than just web-pages! See our plugins!") ~ ' <a href="https://changedetection.io/plugins">' ~ _('More info') ~ '</a>',
|
||||
_("You can also add 'shared' watches.") ~ ' <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">' ~ _('More info') ~ '</a>'
|
||||
] -%}
|
||||
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
@@ -10,6 +14,46 @@
|
||||
// Initialize Feather icons after the page loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
feather.replace();
|
||||
|
||||
// Intersection Observer for lazy loading favicons
|
||||
// Only load favicon images when they enter the viewport
|
||||
if ('IntersectionObserver' in window) {
|
||||
const faviconObserver = new IntersectionObserver((entries, observer) => {
|
||||
entries.forEach(entry => {
|
||||
if (entry.isIntersecting) {
|
||||
const img = entry.target;
|
||||
const src = img.getAttribute('data-src');
|
||||
|
||||
if (src) {
|
||||
// Load the actual favicon
|
||||
img.src = src;
|
||||
img.removeAttribute('data-src');
|
||||
}
|
||||
|
||||
// Stop observing this image
|
||||
observer.unobserve(img);
|
||||
}
|
||||
});
|
||||
}, {
|
||||
// Start loading slightly before the image enters viewport
|
||||
rootMargin: '50px',
|
||||
threshold: 0.01
|
||||
});
|
||||
|
||||
// Observe all lazy favicon images
|
||||
document.querySelectorAll('.lazy-favicon').forEach(img => {
|
||||
faviconObserver.observe(img);
|
||||
});
|
||||
} else {
|
||||
// Fallback for older browsers: load all favicons immediately
|
||||
document.querySelectorAll('.lazy-favicon').forEach(img => {
|
||||
const src = img.getAttribute('data-src');
|
||||
if (src) {
|
||||
img.src = src;
|
||||
img.removeAttribute('data-src');
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<style>
|
||||
@@ -62,14 +106,16 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title=_("Edit first then Watch") ) }}
|
||||
</div>
|
||||
<div id="watch-group-tag">
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="Watch group / tag", class="transparent-field") }}
|
||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder=_("Watch group / tag"), class="transparent-field") }}
|
||||
</div>
|
||||
<div id="quick-watch-processor-type">
|
||||
{{ render_simple_field(form.processor) }}
|
||||
</div>
|
||||
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;"><img alt="{{ _('Create a shareable link') }}" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > {{ _("Tip: You can also add 'shared' watches.") }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">{{ _('More info') }}</a></span>
|
||||
<span style="color:#eee; font-size: 80%;">
|
||||
<strong>Tip: </strong> {{ tips | random | safe }}
|
||||
</span>
|
||||
</form>
|
||||
</div>
|
||||
<div class="box">
|
||||
@@ -99,9 +145,14 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
data-confirm-message="{{ _('<p>Are you sure you want to delete the selected watches?</strong></p><p>This action cannot be undone.</p>') }}"
|
||||
data-confirm-button="{{ _('Delete') }}"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>{{ _('Delete') }}</button>
|
||||
</div>
|
||||
{%- if watches|length >= pagination.per_page -%}
|
||||
{{ pagination.info }}
|
||||
{%- endif -%}
|
||||
|
||||
<div id="stats_row">
|
||||
<div class="left">{%- if watches|length >= pagination.per_page -%}{{ pagination.info }}{%- endif -%}</div>
|
||||
<div class="right" >{{ _('Queued size') }}: <span id="queue-size-int">{{ queue_size }}</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
{%- if search_q -%}<div id="search-result-info">{{ _('Searching') }} "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
|
||||
<div>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">{{ _('All') }}</a>
|
||||
@@ -154,7 +205,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
<tbody>
|
||||
{%- if not watches|length -%}
|
||||
<tr>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">{{ _('No website watches configured, please add a URL in the box above, or') }} <a href="{{ url_for('imports.import_page')}}" >{{ _('import a list') }}</a>.</td>
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">{{ _('No web page change detection watches configured, please add a URL in the box above, or') }} <a href="{{ url_for('imports.import_page')}}" >{{ _('import a list') }}</a>.</td>
|
||||
</tr>
|
||||
{%- endif -%}
|
||||
|
||||
@@ -195,30 +246,40 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
<td class="title-col inline">
|
||||
<div class="flex-wrapper">
|
||||
{% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
|
||||
<div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder' #}
|
||||
<img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {% endif %} >
|
||||
<div>
|
||||
{# Intersection Observer lazy loading: store real URL in data-src, load only when visible in viewport #}
|
||||
<img alt="Favicon thumbnail"
|
||||
class="favicon lazy-favicon"
|
||||
loading="lazy"
|
||||
decoding="async"
|
||||
fetchpriority="low"
|
||||
{% if favicon %}
|
||||
data-src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}"
|
||||
{% endif %}
|
||||
src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E'>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<span class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</span>
|
||||
<a href="{{url_for('watchlist.index', tag=watch_tag_uuid) }}" class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</a>
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
|
||||
@@ -71,10 +71,19 @@ class Fetcher():
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
# Screenshot element locking - prevents layout shifts during screenshot capture
|
||||
# Only needed for visual comparison (image_ssim_diff processor)
|
||||
# Locks element dimensions in the first viewport to prevent headers/ads from resizing
|
||||
lock_viewport_elements = False # Default: disabled for performance
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
# Allow lock_viewport_elements to be set via kwargs
|
||||
if kwargs and 'lock_viewport_elements' in kwargs:
|
||||
self.lock_viewport_elements = kwargs.get('lock_viewport_elements')
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
@@ -8,20 +9,24 @@ from loguru import logger
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable, \
|
||||
BrowserStepsStepException
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport_size
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
|
||||
logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
||||
@@ -29,25 +34,31 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport_size['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled (took {lock_time:.2f}s)")
|
||||
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport_size['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
@@ -69,7 +80,11 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
# Restore original viewport size
|
||||
@@ -81,40 +96,54 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
|
||||
# If we have multiple chunks, stitch them together
|
||||
if len(screenshot_chunks) > 1:
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
|
||||
# Only use separate process for many chunks (4+) to avoid blocking the event loop
|
||||
if len(screenshot_chunks) <= 3:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
|
||||
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
else:
|
||||
# Use separate process for many chunks to avoid blocking
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
# Explicit cleanup
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
# Explicit cleanup
|
||||
del screenshot_chunks
|
||||
screenshot_chunks = None
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
|
||||
return screenshot_chunks[0]
|
||||
|
||||
@@ -184,7 +213,8 @@ class fetcher(Fetcher):
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
watch_uuid = getattr(self, 'watch_uuid', None)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Request GC immediately after screenshot to free memory
|
||||
# Screenshots can be large and browser steps take many of them
|
||||
@@ -233,6 +263,7 @@ class fetcher(Fetcher):
|
||||
import playwright._impl._errors
|
||||
import time
|
||||
self.delete_browser_steps_screenshots()
|
||||
self.watch_uuid = watch_uuid # Store for use in screenshot_step
|
||||
response = None
|
||||
|
||||
async with async_playwright() as p:
|
||||
@@ -318,13 +349,8 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
|
||||
# Cleanup before raising to prevent memory leak
|
||||
await self.page.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
# Force garbage collection to release Playwright resources immediately
|
||||
gc.collect()
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
# Finally block will handle cleanup
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
@@ -337,7 +363,11 @@ class fetcher(Fetcher):
|
||||
try:
|
||||
# Run Browser Steps here
|
||||
if self.browser_steps_get_valid_steps():
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
try:
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
except BrowserStepsStepException:
|
||||
# Finally block will handle cleanup
|
||||
raise
|
||||
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
@@ -374,44 +404,51 @@ class fetcher(Fetcher):
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - screenshots are large and base64 decode creates temporary buffers
|
||||
await self.page.request_gc()
|
||||
gc.collect()
|
||||
|
||||
except ScreenshotUnavailable:
|
||||
# Re-raise screenshot unavailable exceptions
|
||||
raise
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
|
||||
finally:
|
||||
# Request garbage collection one more time before closing
|
||||
# Clean up resources properly with timeouts to prevent hanging
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Clean up resources properly
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
if hasattr(self, 'page') and self.page:
|
||||
await self.page.request_gc()
|
||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed page for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing page for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing page for {url}: {e}")
|
||||
finally:
|
||||
self.page = None
|
||||
|
||||
try:
|
||||
await self.page.close()
|
||||
except:
|
||||
pass
|
||||
self.page = None
|
||||
if context:
|
||||
await asyncio.wait_for(context.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed context for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing context for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing context for {url}: {e}")
|
||||
finally:
|
||||
context = None
|
||||
|
||||
try:
|
||||
await context.close()
|
||||
except:
|
||||
pass
|
||||
context = None
|
||||
|
||||
try:
|
||||
await browser.close()
|
||||
except:
|
||||
pass
|
||||
browser = None
|
||||
if browser:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed browser connection for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing browser connection for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing browser for {url}: {e}")
|
||||
finally:
|
||||
browser = None
|
||||
|
||||
# Force Python GC to release Playwright resources immediately
|
||||
# Playwright objects can have circular references that delay cleanup
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import websockets.exceptions
|
||||
@@ -20,18 +21,20 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
|
||||
logger.debug(f"{watch_info}Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
@@ -50,20 +53,35 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
if page_height > page.viewport['height']:
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page() changes viewport height which triggers @media (min-height) rules
|
||||
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
file_read_start = time.time()
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
file_read_time = time.time() - file_read_start
|
||||
|
||||
evaluate_start = time.time()
|
||||
await page.evaluate(lock_elements_js)
|
||||
evaluate_time = time.time() - evaluate_start
|
||||
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled - File read: {file_read_time:.3f}s, Browser evaluate: {evaluate_time:.2f}s, Total: {lock_time:.2f}s")
|
||||
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
@@ -82,7 +100,11 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
@@ -93,26 +115,53 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
screenshot_chunks = None
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot_chunks[0]
|
||||
|
||||
|
||||
@@ -173,19 +222,36 @@ class fetcher(Fetcher):
|
||||
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
|
||||
|
||||
async def quit(self, watch=None):
|
||||
try:
|
||||
await self.page.close()
|
||||
del self.page
|
||||
except Exception as e:
|
||||
pass
|
||||
watch_uuid = watch.get('uuid') if watch else 'unknown'
|
||||
|
||||
# Close page
|
||||
try:
|
||||
await self.browser.close()
|
||||
del self.browser
|
||||
if hasattr(self, 'page') and self.page:
|
||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Page closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing page (5s)")
|
||||
except Exception as e:
|
||||
pass
|
||||
logger.warning(f"[{watch_uuid}] Error closing page: {e}")
|
||||
finally:
|
||||
self.page = None
|
||||
|
||||
logger.info("Cleanup puppeteer complete.")
|
||||
# Close browser connection
|
||||
try:
|
||||
if hasattr(self, 'browser') and self.browser:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Browser closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing browser (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{watch_uuid}] Error closing browser: {e}")
|
||||
finally:
|
||||
self.browser = None
|
||||
|
||||
logger.info(f"[{watch_uuid}] Cleanup puppeteer complete")
|
||||
|
||||
# Force garbage collection to release resources
|
||||
gc.collect()
|
||||
|
||||
async def fetch_page(self,
|
||||
current_include_filters,
|
||||
@@ -215,9 +281,11 @@ class fetcher(Fetcher):
|
||||
# Connect directly using the specified browser_ws_endpoint
|
||||
# @todo timeout
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Connecting to browser at {self.browser_connection_url}")
|
||||
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||
ignoreHTTPSErrors=True
|
||||
)
|
||||
logger.debug(f"[{watch_uuid}] Browser connected successfully")
|
||||
except websockets.exceptions.InvalidStatusCode as e:
|
||||
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
||||
except websockets.exceptions.InvalidURI:
|
||||
@@ -226,7 +294,18 @@ class fetcher(Fetcher):
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||
|
||||
# more reliable is to just request a new page
|
||||
self.page = await self.browser.newPage()
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Creating new page")
|
||||
self.page = await self.browser.newPage()
|
||||
logger.debug(f"[{watch_uuid}] Page created successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"[{watch_uuid}] Failed to create new page: {e}")
|
||||
# Browser is connected but page creation failed - must cleanup browser
|
||||
try:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
||||
raise
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||
@@ -295,6 +374,12 @@ class fetcher(Fetcher):
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
await asyncio.sleep(w)
|
||||
|
||||
# Check if page still exists (might have been closed due to error during sleep)
|
||||
if not self.page or not hasattr(self.page, '_client'):
|
||||
logger.debug("Page already closed, skipping stopLoading")
|
||||
return
|
||||
|
||||
logger.debug("Issuing stopLoading command...")
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
@@ -320,7 +405,9 @@ class fetcher(Fetcher):
|
||||
asyncio.create_task(handle_frame_navigation())
|
||||
response = await self.page.goto(url, timeout=0)
|
||||
await asyncio.sleep(1 + extra_wait)
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
# Check if page still exists before sending command
|
||||
if self.page and hasattr(self.page, '_client'):
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
|
||||
if response:
|
||||
break
|
||||
@@ -357,7 +444,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
@@ -387,7 +474,11 @@ class fetcher(Fetcher):
|
||||
|
||||
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force garbage collection - pyppeteer base64 decode creates temporary buffers
|
||||
import gc
|
||||
gc.collect()
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
|
||||
@@ -55,6 +55,26 @@ class fetcher(Fetcher):
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
# Configure retry adapter for low-level network errors only
|
||||
# Retries connection timeouts, read timeouts, connection resets - not HTTP status codes
|
||||
# Especially helpful in parallel test execution when servers are slow/overloaded
|
||||
# Configurable via REQUESTS_RETRY_MAX_COUNT (default: 3 attempts)
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
max_retries = int(os.getenv("REQUESTS_RETRY_MAX_COUNT", "6"))
|
||||
retry_strategy = Retry(
|
||||
total=max_retries,
|
||||
connect=max_retries, # Retry connection timeouts
|
||||
read=max_retries, # Retry read timeouts
|
||||
status=0, # Don't retry on HTTP status codes
|
||||
backoff_factor=0.5, # Wait 0.3s, 0.6s, 1.2s between retries
|
||||
allowed_methods=["HEAD", "GET", "OPTIONS", "POST"],
|
||||
raise_on_status=False
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||
from requests_file import FileAdapter
|
||||
@@ -142,10 +162,11 @@ class fetcher(Fetcher):
|
||||
watch_uuid=None,
|
||||
):
|
||||
"""Async wrapper that runs the synchronous requests code in a thread pool"""
|
||||
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
|
||||
# Run the synchronous _run_sync in a thread pool to avoid blocking the event loop
|
||||
# Retry logic is handled by requests' HTTPAdapter (see _run_sync for configuration)
|
||||
await loop.run_in_executor(
|
||||
None, # Use default ThreadPoolExecutor
|
||||
lambda: self._run_sync(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Lock Element Dimensions for Screenshot Capture
|
||||
* Lock Element Dimensions for Screenshot Capture (First Viewport Only)
|
||||
*
|
||||
* THE PROBLEM:
|
||||
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
|
||||
@@ -10,40 +10,31 @@
|
||||
* However, changing the viewport height triggers CSS media queries like:
|
||||
* @media (min-height: 860px) { .ad { height: 250px; } }
|
||||
*
|
||||
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
|
||||
* - Screenshot shows element at NEW size (after media query triggered)
|
||||
* - xpath element coordinates measured at OLD size (before viewport change)
|
||||
* - Visual selector overlays don't align with screenshot
|
||||
*
|
||||
* EXAMPLE BUG:
|
||||
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
|
||||
* - Viewport changes to 1280x3809 for screenshot
|
||||
* - Media query triggers: ad expands to 250px
|
||||
* - All content below shifts down by 112px (250-138)
|
||||
* - Article now at position: 391px (279+112)
|
||||
* - But xpath data says 279px → 112px mismatch! ✗
|
||||
* This causes elements (especially ads/headers) to resize during screenshot capture.
|
||||
*
|
||||
* THE SOLUTION:
|
||||
* Before changing viewport, lock ALL element dimensions with !important inline styles.
|
||||
* Inline styles with !important override media query CSS, preventing layout changes.
|
||||
* Lock element dimensions in the FIRST VIEWPORT ONLY with !important inline styles.
|
||||
* This prevents headers, navigation, and top ads from resizing when viewport changes.
|
||||
* We only lock the visible portion because:
|
||||
* - Most layout shifts happen in headers/navbars/top ads
|
||||
* - Locking only visible elements is 100x+ faster (100-200 elements vs 10,000+)
|
||||
* - Below-fold content shifts don't affect visual comparison accuracy
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Iterates through every element on the page
|
||||
* 2. Captures current computed dimensions (width, height)
|
||||
* 3. Sets inline styles with !important to freeze those dimensions
|
||||
* 1. Gets current viewport height
|
||||
* 2. Finds elements within first viewport (top of page to bottom of screen)
|
||||
* 3. Locks their dimensions with !important inline styles
|
||||
* 4. Disables ResizeObserver API (for JS-based resizing)
|
||||
* 5. When viewport changes for screenshot, media queries can't resize anything
|
||||
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
|
||||
* The page must be fully loaded and settled at its initial viewport size.
|
||||
* No need to restore state afterward - page is closed after screenshot.
|
||||
* Only enabled for image_ssim_diff processor (visual comparison).
|
||||
* Default: OFF for performance.
|
||||
*
|
||||
* PERFORMANCE:
|
||||
* - Iterates all DOM elements (can be 1000s on complex pages)
|
||||
* - Typically completes in 50-200ms
|
||||
* - One-time cost before screenshot, well worth it for coordinate accuracy
|
||||
* - Only processes 100-300 elements (first viewport) vs 10,000+ (entire page)
|
||||
* - Typically completes in 10-50ms
|
||||
* - 100x+ faster than locking entire page
|
||||
*
|
||||
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
|
||||
*/
|
||||
@@ -52,11 +43,34 @@
|
||||
// Store original styles in a global WeakMap for later restoration
|
||||
window.__elementSizingRestore = new WeakMap();
|
||||
|
||||
// Lock ALL element dimensions to prevent media query layout changes
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const rect = el.getBoundingClientRect();
|
||||
const start = performance.now();
|
||||
|
||||
// Get current viewport height (visible portion of page)
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
// Get all elements and filter to FIRST VIEWPORT ONLY
|
||||
// This dramatically reduces elements to process (100-300 vs 10,000+)
|
||||
const allElements = Array.from(document.querySelectorAll('*'));
|
||||
|
||||
// BATCH READ PHASE: Get bounding rects and filter to viewport
|
||||
const measurements = allElements.map(el => {
|
||||
const rect = el.getBoundingClientRect();
|
||||
const computed = window.getComputedStyle(el);
|
||||
|
||||
// Only lock elements in the first viewport (visible on initial page load)
|
||||
// rect.top < viewportHeight means element starts within visible area
|
||||
const inViewport = rect.top < viewportHeight && rect.top >= 0;
|
||||
const hasSize = rect.height > 0 && rect.width > 0;
|
||||
|
||||
return inViewport && hasSize ? { el, computed, rect } : null;
|
||||
}).filter(Boolean); // Remove null entries
|
||||
|
||||
const elapsed = performance.now() - start;
|
||||
console.log(`Locked first viewport elements: ${measurements.length} of ${allElements.length} total elements (viewport height: ${viewportHeight}px, took ${elapsed.toFixed(0)}ms)`);
|
||||
|
||||
// BATCH WRITE PHASE: Apply all inline styles without triggering layout
|
||||
// No interleaved reads means browser can optimize style application
|
||||
measurements.forEach(({el, computed, rect}) => {
|
||||
// Save original inline style values BEFORE locking
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
const originalStyles = {};
|
||||
@@ -89,5 +103,5 @@
|
||||
disconnect() {}
|
||||
};
|
||||
|
||||
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
|
||||
console.log(`✓ Element dimensions locked (${measurements.length} elements) to prevent media query changes during screenshot`);
|
||||
})();
|
||||
|
||||
@@ -8,92 +8,42 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
||||
|
||||
# Cache font to avoid loading on every stitch
|
||||
_cached_font = None
|
||||
|
||||
def _get_caption_font():
|
||||
"""Get or create cached font for caption text."""
|
||||
global _cached_font
|
||||
if _cached_font is None:
|
||||
from PIL import ImageFont
|
||||
try:
|
||||
_cached_font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
_cached_font = ImageFont.load_default()
|
||||
return _cached_font
|
||||
|
||||
|
||||
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together inline (no multiprocessing).
|
||||
Optimized for small number of chunks (2-3) to avoid process creation overhead.
|
||||
|
||||
Args:
|
||||
chunks_bytes: List of JPEG image bytes
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
|
||||
Returns:
|
||||
bytes: Stitched JPEG image
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
# Create stitched image
|
||||
stitched = Image.new('RGB', (max_width, total_height))
|
||||
y_offset = 0
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font = _get_caption_font()
|
||||
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode to JPEG
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result = output.getvalue()
|
||||
|
||||
# Cleanup
|
||||
stitched.close()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
||||
def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
Used for large number of chunks (4+) to avoid blocking the main event loop.
|
||||
|
||||
Uses spawn multiprocessing to isolate PIL's C-level memory allocation.
|
||||
When the subprocess exits, the OS reclaims ALL memory including C-level allocations
|
||||
that Python's GC cannot release. This prevents the ~50MB per stitch from accumulating
|
||||
in the main process.
|
||||
|
||||
Trade-off: Adds 35MB resource_tracker subprocess, but prevents 500MB+ memory leak
|
||||
in main process (much better at scale: 35GB vs 500GB for 1000 instances).
|
||||
|
||||
Args:
|
||||
pipe_conn: Pipe connection to receive data and send result
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
import struct
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
try:
|
||||
# Receive chunk count as 4-byte integer (no pickle!)
|
||||
count_bytes = pipe_conn.recv_bytes()
|
||||
chunk_count = struct.unpack('I', count_bytes)[0]
|
||||
|
||||
# Receive each chunk as raw bytes (no pickle!)
|
||||
chunks_bytes = []
|
||||
for _ in range(chunk_count):
|
||||
chunks_bytes.append(pipe_conn.recv_bytes())
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
del chunks_bytes
|
||||
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
@@ -103,15 +53,14 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
im.close()
|
||||
del images
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
|
||||
# Try to load font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
@@ -120,23 +69,26 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode and send image with optimization
|
||||
# Encode and send
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
pipe_conn.send_bytes(output.getvalue())
|
||||
result_bytes = output.getvalue()
|
||||
|
||||
stitched.close()
|
||||
del stitched
|
||||
output.close()
|
||||
del output
|
||||
|
||||
pipe_conn.send_bytes(result_bytes)
|
||||
del result_bytes
|
||||
|
||||
except Exception as e:
|
||||
pipe_conn.send(f"error:{e}")
|
||||
logger.error(f"Error in stitch_images_worker_raw_bytes: {e}")
|
||||
error_msg = f"error:{e}".encode('utf-8')
|
||||
pipe_conn.send_bytes(error_msg)
|
||||
finally:
|
||||
pipe_conn.close()
|
||||
|
||||
|
||||
|
||||
@@ -156,6 +156,19 @@ class fetcher(Fetcher):
|
||||
from PIL import Image
|
||||
import io
|
||||
img = Image.open(io.BytesIO(screenshot_png))
|
||||
# Convert to RGB if needed (JPEG doesn't support transparency)
|
||||
# Always convert non-RGB modes to RGB to ensure JPEG compatibility
|
||||
if img.mode in ('RGBA', 'LA', 'P', 'PA'):
|
||||
# Handle transparency by compositing onto white background
|
||||
if img.mode == 'P':
|
||||
img = img.convert('RGBA')
|
||||
background = Image.new('RGB', img.size, (255, 255, 255))
|
||||
if img.mode in ('RGBA', 'LA', 'PA'):
|
||||
background.paste(img, mask=img.split()[-1]) # Use alpha channel as mask
|
||||
img = background
|
||||
elif img.mode != 'RGB':
|
||||
# For other modes, direct conversion
|
||||
img = img.convert('RGB')
|
||||
jpeg_buffer = io.BytesIO()
|
||||
img.save(jpeg_buffer, format='JPEG', quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||
self.screenshot = jpeg_buffer.getvalue()
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
"""
|
||||
Favicon utilities for changedetection.io
|
||||
Handles favicon MIME type detection with caching
|
||||
"""
|
||||
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
@lru_cache(maxsize=1000)
|
||||
def get_favicon_mime_type(filepath):
|
||||
"""
|
||||
Detect MIME type of favicon by reading file content using puremagic.
|
||||
Results are cached to avoid repeatedly reading the same files.
|
||||
|
||||
Args:
|
||||
filepath: Full path to the favicon file
|
||||
|
||||
Returns:
|
||||
MIME type string (e.g., 'image/png')
|
||||
"""
|
||||
mime = None
|
||||
|
||||
try:
|
||||
import puremagic
|
||||
with open(filepath, 'rb') as f:
|
||||
content_bytes = f.read(200) # Read first 200 bytes
|
||||
|
||||
detections = puremagic.magic_string(content_bytes)
|
||||
if detections:
|
||||
mime = detections[0].mime_type
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to mimetypes if puremagic fails
|
||||
if not mime:
|
||||
import mimetypes
|
||||
mime, _ = mimetypes.guess_type(filepath)
|
||||
|
||||
# Final fallback based on extension
|
||||
if not mime:
|
||||
mime = 'image/x-icon' if filepath.endswith('.ico') else 'image/png'
|
||||
|
||||
return mime
|
||||
+127
-54
@@ -9,11 +9,12 @@ import threading
|
||||
import time
|
||||
import timeago
|
||||
from blinker import signal
|
||||
from pathlib import Path
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
from flask import (
|
||||
Flask,
|
||||
@@ -26,9 +27,7 @@ from flask import (
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from urllib.parse import urlparse
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
from flask_login import current_user
|
||||
from flask_restful import abort, Api
|
||||
from flask_cors import CORS
|
||||
|
||||
@@ -45,6 +44,9 @@ from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, Watch
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||
from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
IN_PYTEST = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
datastore = None
|
||||
|
||||
@@ -55,7 +57,7 @@ extra_stylesheets = []
|
||||
# Use bulletproof janus-based queues for sync/async reliability
|
||||
update_q = RecheckPriorityQueue()
|
||||
notification_q = NotificationQueue()
|
||||
MAX_QUEUE_SIZE = 2000
|
||||
MAX_QUEUE_SIZE = 5000
|
||||
|
||||
app = Flask(__name__,
|
||||
static_url_path="",
|
||||
@@ -69,9 +71,13 @@ socketio_server = None
|
||||
CORS(app)
|
||||
|
||||
# Super handy for compressing large BrowserSteps responses and others
|
||||
FlaskCompress(app)
|
||||
app.config['COMPRESS_MIN_SIZE'] = 4096
|
||||
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak
|
||||
compress = FlaskCompress()
|
||||
app.config['COMPRESS_MIN_SIZE'] = 2096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
||||
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
||||
compress.init_app(app)
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
||||
|
||||
|
||||
@@ -84,6 +90,18 @@ app.config['NEW_VERSION_AVAILABLE'] = False
|
||||
if os.getenv('FLASK_SERVER_NAME'):
|
||||
app.config['SERVER_NAME'] = os.getenv('FLASK_SERVER_NAME')
|
||||
|
||||
# Babel/i18n configuration
|
||||
app.config['BABEL_TRANSLATION_DIRECTORIES'] = str(Path(__file__).parent / 'translations')
|
||||
app.config['BABEL_DEFAULT_LOCALE'] = 'en_GB'
|
||||
|
||||
# Session configuration
|
||||
# NOTE: Flask session (for locale, etc.) is separate from Flask-Login's remember-me cookie
|
||||
# - Flask session stores data like session['locale'] in a signed cookie
|
||||
# - Flask-Login's remember=True creates a separate authentication cookie
|
||||
# - Setting PERMANENT_SESSION_LIFETIME controls how long the Flask session cookie lasts
|
||||
from datetime import timedelta
|
||||
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=3650) # ~10 years (effectively unlimited)
|
||||
|
||||
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
|
||||
|
||||
|
||||
@@ -177,7 +195,7 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
return worker_handler.is_watch_running(watch_obj['uuid'])
|
||||
return worker_pool.is_watch_running(watch_obj['uuid'])
|
||||
|
||||
@app.template_global('get_watch_queue_position')
|
||||
def _get_watch_queue_position(watch_obj):
|
||||
@@ -188,13 +206,13 @@ def _get_watch_queue_position(watch_obj):
|
||||
@app.template_global('get_current_worker_count')
|
||||
def _get_current_worker_count():
|
||||
"""Get the current number of operational workers"""
|
||||
return worker_handler.get_worker_count()
|
||||
return worker_pool.get_worker_count()
|
||||
|
||||
@app.template_global('get_worker_status_info')
|
||||
def _get_worker_status_info():
|
||||
"""Get detailed worker status information for display"""
|
||||
status = worker_handler.get_worker_status()
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
status = worker_pool.get_worker_status()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
return {
|
||||
'count': status['worker_count'],
|
||||
@@ -370,6 +388,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
global datastore, socketio_server
|
||||
datastore = datastore_o
|
||||
|
||||
# Set datastore reference in notification queue for all_muted checking
|
||||
notification_q.set_datastore(datastore)
|
||||
|
||||
# Import and create a wrapper for is_safe_url that has access to app
|
||||
from changedetectionio.is_safe_url import is_safe_url as _is_safe_url
|
||||
|
||||
@@ -380,7 +401,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE'] = datastore_o
|
||||
|
||||
|
||||
# Store batch mode flag to skip background threads when running in batch mode
|
||||
app.config['batch_mode'] = config.get('batch_mode', False) if config else False
|
||||
|
||||
# Store the signal in the app config to ensure it's accessible everywhere
|
||||
app.config['watch_check_update_SIGNAL'] = watch_check_update
|
||||
|
||||
@@ -393,15 +417,27 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
language_codes = get_language_codes()
|
||||
|
||||
def get_locale():
|
||||
# Locale aliases: map browser language codes to translation directory names
|
||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
||||
locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
locale = session['locale']
|
||||
logger.trace(f"DEBUG: get_locale() returning from session: {locale}")
|
||||
return locale
|
||||
return session['locale']
|
||||
|
||||
# 2. Fall back to Accept-Language header
|
||||
locale = request.accept_languages.best_match(language_codes)
|
||||
logger.trace(f"DEBUG: get_locale() returning from Accept-Language: {locale}")
|
||||
return locale
|
||||
# Get the best match from browser's Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
||||
|
||||
# 3. Check if we need to map the browser locale to our internal locale
|
||||
if browser_locale in locale_aliases:
|
||||
return locale_aliases[browser_locale]
|
||||
|
||||
return browser_locale
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
@@ -518,9 +554,23 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
@app.route('/set-language/<locale>')
|
||||
def set_language(locale):
|
||||
"""Set the user's preferred language in the session"""
|
||||
if not request.cookies:
|
||||
logger.error("Cannot set language without session cookie")
|
||||
flash("Cannot set language without session cookie", 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Validate the locale against available languages
|
||||
if locale in language_codes:
|
||||
# Make session permanent so language preference persists across browser sessions
|
||||
# NOTE: This is the Flask session cookie (separate from Flask-Login's remember-me auth cookie)
|
||||
session.permanent = True
|
||||
session['locale'] = locale
|
||||
|
||||
# CRITICAL: Flask-Babel caches the locale in the request context (ctx.babel_locale)
|
||||
# We must refresh to clear this cache so the new locale takes effect immediately
|
||||
# This is especially important for tests where multiple requests happen rapidly
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
else:
|
||||
logger.error(f"Invalid locale {locale}, available: {language_codes}")
|
||||
|
||||
@@ -652,16 +702,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
@@ -758,13 +801,15 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# watchlist UI buttons etc
|
||||
import changedetectionio.blueprint.ui as ui
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update))
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_pool, queuedWatchMetaData, watch_check_update))
|
||||
|
||||
import changedetectionio.blueprint.watchlist as watchlist
|
||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||
|
||||
# Initialize Socket.IO server conditionally based on settings
|
||||
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
if socket_io_enabled and app.config.get('batch_mode'):
|
||||
socket_io_enabled = False
|
||||
if socket_io_enabled:
|
||||
from changedetectionio.realtime.socket_server import init_socketio
|
||||
global socketio_server
|
||||
@@ -793,10 +838,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
|
||||
# Get basic status
|
||||
status = worker_handler.get_worker_status()
|
||||
status = worker_pool.get_worker_status()
|
||||
|
||||
# Perform health check
|
||||
health_result = worker_handler.check_worker_health(
|
||||
health_result = worker_pool.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -860,16 +905,31 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Can be overridden by ENV or use the default settings
|
||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
logger.info(f"Starting {n_workers} workers during app initialization")
|
||||
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
worker_pool.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
threading.Thread(target=notification_runner, daemon=True, name="NotificationRunner").start()
|
||||
# Skip background threads in batch mode (just process queue and exit)
|
||||
batch_mode = app.config.get('batch_mode', False)
|
||||
if not batch_mode:
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
# Start configurable number of notification workers (default 1)
|
||||
notification_workers = int(os.getenv("NOTIFICATION_WORKERS", "1"))
|
||||
for i in range(notification_workers):
|
||||
threading.Thread(
|
||||
target=notification_runner,
|
||||
args=(i,),
|
||||
daemon=True,
|
||||
name=f"NotificationRunner-{i}"
|
||||
).start()
|
||||
logger.info(f"Started {notification_workers} notification worker(s)")
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
else:
|
||||
logger.info("Batch mode: Skipping ticker thread, notification runner, and version checker")
|
||||
|
||||
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
|
||||
# This avoids circular dependencies
|
||||
@@ -904,14 +964,14 @@ def check_for_new_version():
|
||||
app.config.exit.wait(86400)
|
||||
|
||||
|
||||
def notification_runner():
|
||||
def notification_runner(worker_id=0):
|
||||
global notification_debug_log
|
||||
from datetime import datetime
|
||||
import json
|
||||
with app.app_context():
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
# At the moment only one thread runs (single runner)
|
||||
# Multiple workers can run concurrently (configurable via NOTIFICATION_WORKERS)
|
||||
n_object = notification_q.get(block=False)
|
||||
except queue.Empty:
|
||||
app.config.exit.wait(1)
|
||||
@@ -937,7 +997,7 @@ def notification_runner():
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
logger.error(f"Notification worker {worker_id} - Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
|
||||
# UUID wont be present when we submit a 'test' from the global settings
|
||||
if 'uuid' in n_object:
|
||||
@@ -967,6 +1027,10 @@ def ticker_thread_check_time_launch_checks():
|
||||
logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}")
|
||||
|
||||
# Workers are now started during app initialization, not here
|
||||
WAIT_TIME_BETWEEN_LOOP = 1.0 if not IN_PYTEST else 0.01
|
||||
if IN_PYTEST:
|
||||
# The time between loops should be less than the first .sleep/wait in def wait_for_all_checks() of tests/util.py
|
||||
logger.warning(f"Looks like we're in PYTEST! Setting time between searching for items to add to the queue to {WAIT_TIME_BETWEEN_LOOP}s")
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
|
||||
@@ -974,7 +1038,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
now = time.time()
|
||||
if now - last_health_check > 60:
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
health_result = worker_handler.check_worker_health(
|
||||
health_result = worker_pool.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -984,11 +1048,19 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
if health_result['status'] != 'healthy':
|
||||
logger.warning(f"Worker health check: {health_result['message']}")
|
||||
|
||||
|
||||
last_health_check = now
|
||||
|
||||
# Check if all checks are paused
|
||||
if datastore.data['settings']['application'].get('all_paused', False):
|
||||
app.config.exit.wait(1)
|
||||
continue
|
||||
|
||||
# Get a list of watches by UUID that are currently fetching data
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
# Build set of queued UUIDs once for O(1) lookup instead of O(n) per watch
|
||||
queued_uuids = {q_item.item['uuid'] for q_item in update_q.queue}
|
||||
|
||||
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
|
||||
watch_uuid_list = []
|
||||
@@ -1006,16 +1078,17 @@ def ticker_thread_check_time_launch_checks():
|
||||
else:
|
||||
break
|
||||
|
||||
# Re #438 - Don't place more watches in the queue to be checked if the queue is already large
|
||||
while update_q.qsize() >= 2000:
|
||||
logger.warning(f"Recheck watches queue size limit reached ({MAX_QUEUE_SIZE}), skipping adding more items")
|
||||
app.config.exit.wait(10.0)
|
||||
|
||||
|
||||
recheck_time_system_seconds = int(datastore.threshold_seconds)
|
||||
|
||||
# Check for watches outside of the time threshold to put in the thread queue.
|
||||
for uuid in watch_uuid_list:
|
||||
for watch_index, uuid in enumerate(watch_uuid_list):
|
||||
# Re #438 - Check queue size every 100 watches for CPU efficiency (not every watch)
|
||||
if watch_index % 100 == 0:
|
||||
current_queue_size = update_q.qsize()
|
||||
if current_queue_size >= MAX_QUEUE_SIZE:
|
||||
logger.debug(f"Queue size limit reached ({current_queue_size}/{MAX_QUEUE_SIZE}), stopping scheduler this iteration.")
|
||||
break
|
||||
|
||||
now = time.time()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
@@ -1065,7 +1138,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
seconds_since_last_recheck = now - watch['last_checked']
|
||||
|
||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||
if not uuid in running_uuids and uuid not in [q_uuid.item['uuid'] for q_uuid in update_q.queue]:
|
||||
if not uuid in running_uuids and uuid not in queued_uuids:
|
||||
|
||||
# Proxies can be set to have a limit on seconds between which they can be called
|
||||
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||
@@ -1090,7 +1163,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
priority = int(time.time())
|
||||
|
||||
# Into the queue with you
|
||||
queued_successfully = worker_handler.queue_item_async_safe(update_q,
|
||||
queued_successfully = worker_pool.queue_item_async_safe(update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=priority,
|
||||
item={'uuid': uuid})
|
||||
)
|
||||
@@ -1108,4 +1181,4 @@ def ticker_thread_check_time_launch_checks():
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
# Should be low so we can break this out in testing
|
||||
app.config.exit.wait(1)
|
||||
app.config.exit.wait(WAIT_TIME_BETWEEN_LOOP)
|
||||
|
||||
@@ -727,10 +727,10 @@ class ValidateStartsWithRegex(object):
|
||||
raise ValidationError(self.message or _l("Invalid value."))
|
||||
|
||||
class quickWatchForm(Form):
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()])
|
||||
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
||||
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
@@ -749,7 +749,7 @@ class commonSettingsForm(Form):
|
||||
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))])
|
||||
|
||||
@@ -763,7 +763,7 @@ class commonSettingsForm(Form):
|
||||
|
||||
|
||||
class importForm(Form):
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
urls = TextAreaField(_l('URLs'))
|
||||
xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))])
|
||||
file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||
@@ -786,6 +786,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
time_between_check = EnhancedFormField(
|
||||
TimeBetweenCheckForm,
|
||||
label=_l('Time Between Check'),
|
||||
conditional_field='time_between_check_use_default',
|
||||
conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT,
|
||||
conditional_test_function=validate_time_between_check_has_values
|
||||
@@ -836,6 +837,8 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
||||
use_page_title_in_list = TernaryNoneBooleanField(_l('Use page <title> in list'), default=None)
|
||||
|
||||
history_snapshot_max_length = IntegerField(_l('Number of history items per watch to keep'), render_kw={"style": "width: 5em;"}, validators=[validators.Optional(), validators.NumberRange(min=2)])
|
||||
|
||||
def extra_tab_content(self):
|
||||
return None
|
||||
|
||||
@@ -947,7 +950,7 @@ class DefaultUAInputForm(Form):
|
||||
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm)
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm, label=_l('Time Between Check'))
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
proxy = RadioField(_l('Default proxy'))
|
||||
jitter_seconds = IntegerField(_l('Random jitter seconds ± check'),
|
||||
@@ -1007,7 +1010,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
|
||||
)
|
||||
|
||||
password = SaltyPasswordField()
|
||||
password = SaltyPasswordField(_l('Password'))
|
||||
pager_size = IntegerField(_l('Pager size'),
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
@@ -1033,6 +1036,8 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message=_l("Should contain zero or more attempts"))])
|
||||
|
||||
history_snapshot_max_length = IntegerField(_l('Number of history items per watch to keep'), render_kw={"style": "width: 5em;"}, validators=[validators.Optional(), validators.NumberRange(min=2)])
|
||||
ui = FormField(globalSettingsApplicationUIForm)
|
||||
|
||||
|
||||
|
||||
@@ -539,6 +539,18 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
|
||||
|
||||
|
||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
|
||||
"""
|
||||
Convert HTML content to plain text using inscriptis.
|
||||
|
||||
Thread-Safety: This function uses inscriptis.get_text() which internally calls
|
||||
lxml.html.fromstring() with the default parser. Testing with 50 concurrent threads
|
||||
confirms this approach is thread-safe and produces deterministic output.
|
||||
|
||||
Alternative Approach Rejected: An explicit HTMLParser instance (thread-local or fresh)
|
||||
would also be thread-safe, but was found to break change detection logic in subtle ways
|
||||
(test_check_basic_change_detection_functionality). The default parser provides correct
|
||||
and reliable behavior.
|
||||
"""
|
||||
from inscriptis import get_text
|
||||
from inscriptis.model.config import ParserConfig
|
||||
|
||||
|
||||
@@ -52,7 +52,13 @@ def render(template_str, **args: t.Any) -> str:
|
||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||
|
||||
def render_fully_escaped(content):
|
||||
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
|
||||
template = env.from_string("{{ some_html|e }}")
|
||||
return template.render(some_html=content)
|
||||
"""
|
||||
Escape HTML content safely.
|
||||
|
||||
MEMORY LEAK FIX: Use markupsafe.escape() directly instead of creating
|
||||
Jinja2 environments (was causing 1M+ compilations per page load).
|
||||
Simpler, faster, and no concerns about environment state.
|
||||
"""
|
||||
from markupsafe import escape
|
||||
return str(escape(content))
|
||||
|
||||
|
||||
@@ -29,7 +29,9 @@ def get_timeago_locale(flask_locale):
|
||||
"""
|
||||
locale_map = {
|
||||
'zh': 'zh_CN', # Chinese Simplified
|
||||
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW
|
||||
# timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646.
|
||||
'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW)
|
||||
'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW
|
||||
'pt': 'pt_PT', # Portuguese (Portugal)
|
||||
'sv': 'sv_SE', # Swedish
|
||||
'no': 'nb_NO', # Norwegian Bokmål
|
||||
@@ -54,7 +56,7 @@ LANGUAGE_DATA = {
|
||||
'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'},
|
||||
'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'},
|
||||
'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'},
|
||||
'zh_TW': {'flag': 'fi fi-tw fis', 'name': '繁體中文'},
|
||||
'zh_Hant_TW': {'flag': 'fi fi-tw fis', 'name': '繁體中文'},
|
||||
'ru': {'flag': 'fi fi-ru fis', 'name': 'Русский'},
|
||||
'pl': {'flag': 'fi fi-pl fis', 'name': 'Polski'},
|
||||
'nl': {'flag': 'fi fi-nl fis', 'name': 'Nederlands'},
|
||||
|
||||
@@ -29,7 +29,7 @@ class model(dict):
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "5")), # Number of threads, lower is better for slow connections
|
||||
'default_ua': {
|
||||
'html_requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT),
|
||||
'html_webdriver': None,
|
||||
@@ -37,6 +37,8 @@ class model(dict):
|
||||
},
|
||||
'application': {
|
||||
# Custom notification content
|
||||
'all_paused': False,
|
||||
'all_muted': False,
|
||||
'api_access_token_enabled': True,
|
||||
'base_url' : None,
|
||||
'empty_pages_are_a_change': False,
|
||||
@@ -44,6 +46,7 @@ class model(dict):
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
'history_snapshot_max_length': None,
|
||||
'ignore_whitespace': True,
|
||||
'ignore_status_codes': False, #@todo implement, as ternary.
|
||||
'ssim_threshold': '0.96', # Default SSIM threshold for screenshot comparison
|
||||
|
||||
@@ -1,10 +1,52 @@
|
||||
"""
|
||||
Tag/Group domain model for organizing and overriding watch settings.
|
||||
|
||||
ARCHITECTURE NOTE: Configuration Override Hierarchy
|
||||
===================================================
|
||||
|
||||
Tags can override Watch settings when overrides_watch=True.
|
||||
Current implementation requires manual checking in processors:
|
||||
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = datastore['settings']['application']['tags'][tag_uuid]
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
break
|
||||
|
||||
With Pydantic, this would be automatic via chain resolution:
|
||||
Watch → Tag (first with overrides_watch) → Global
|
||||
|
||||
See: Watch.py model docstring for full Pydantic architecture explanation
|
||||
See: processors/restock_diff/processor.py:184-192 for current manual implementation
|
||||
"""
|
||||
|
||||
from changedetectionio.model import watch_base
|
||||
|
||||
|
||||
class model(watch_base):
|
||||
"""
|
||||
Tag domain model - groups watches and can override their settings.
|
||||
|
||||
Tags inherit from watch_base to reuse all the same fields as Watch.
|
||||
When overrides_watch=True, tag settings take precedence over watch settings
|
||||
for all watches in this tag/group.
|
||||
|
||||
Fields:
|
||||
overrides_watch (bool): If True, this tag's settings override watch settings
|
||||
title (str): Display name for this tag/group
|
||||
uuid (str): Unique identifier
|
||||
... (all fields from watch_base can be set as tag-level overrides)
|
||||
|
||||
Resolution order when overrides_watch=True:
|
||||
Watch.field → Tag.field (if overrides_watch) → Global.field
|
||||
"""
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Store datastore reference (optional for Tags, but good for consistency)
|
||||
self.__datastore = kw.get('__datastore')
|
||||
if kw.get('__datastore'):
|
||||
del kw['__datastore']
|
||||
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
|
||||
+342
-124
@@ -1,3 +1,32 @@
|
||||
"""
|
||||
Watch domain model for change detection monitoring.
|
||||
|
||||
ARCHITECTURE NOTE: Configuration Override Hierarchy
|
||||
===================================================
|
||||
|
||||
This module implements Watch objects that inherit from dict (technical debt).
|
||||
The dream architecture would use Pydantic for:
|
||||
|
||||
1. CHAIN RESOLUTION (Watch → Tag → Global Settings)
|
||||
- Current: Manual resolution scattered across codebase
|
||||
- Future: @computed_field properties with automatic resolution
|
||||
- Examples: resolved_fetch_backend, resolved_restock_settings, etc.
|
||||
|
||||
2. DATABASE BACKEND ABSTRACTION
|
||||
- Current: Domain model tightly coupled to file-based JSON storage
|
||||
- Future: Domain model (Pydantic) separate from persistence layer
|
||||
- Enables: Easy migration to PostgreSQL, MongoDB, etc.
|
||||
|
||||
3. TYPE SAFETY & VALIDATION
|
||||
- Current: Dict access with no compile-time checks
|
||||
- Future: Type hints, IDE autocomplete, validation at boundaries
|
||||
|
||||
See class model docstring for detailed explanation and examples.
|
||||
See: processors/restock_diff/processor.py:184-192 for manual resolution example
|
||||
"""
|
||||
import gc
|
||||
from copy import copy
|
||||
|
||||
from blinker import signal
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
@@ -13,57 +42,21 @@ from .. import jinja2_custom as safe_jinja
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
def _brotli_compress_worker(conn, filepath, mode=None):
|
||||
def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
Worker function to compress data with brotli in a separate process.
|
||||
This isolates memory - when process exits, OS reclaims all memory.
|
||||
|
||||
Args:
|
||||
conn: multiprocessing.Pipe connection to receive data
|
||||
filepath: destination file path
|
||||
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
|
||||
"""
|
||||
import brotli
|
||||
|
||||
try:
|
||||
# Receive data from parent process via pipe (avoids pickle overhead)
|
||||
contents = conn.recv()
|
||||
logger.debug(f"Starting brotli compression of {len(contents)} bytes.")
|
||||
|
||||
if mode is not None:
|
||||
compressed_data = brotli.compress(contents, mode=mode)
|
||||
else:
|
||||
compressed_data = brotli.compress(contents)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(compressed_data)
|
||||
|
||||
# Send success status back
|
||||
conn.send(True)
|
||||
logger.debug(f"Finished brotli compression - From {len(contents)} to {len(compressed_data)} bytes.")
|
||||
# No need for explicit cleanup - process exit frees all memory
|
||||
except Exception as e:
|
||||
logger.critical(f"Brotli compression worker failed: {e}")
|
||||
conn.send(False)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _brotli_subprocess_save(contents, filepath, mode=None, timeout=30, fallback_uncompressed=False):
|
||||
"""
|
||||
Save compressed data using subprocess to isolate memory.
|
||||
Uses Pipe to avoid pickle overhead for large data.
|
||||
Save compressed data using native brotli with streaming compression.
|
||||
Uses chunked compression to minimize peak memory usage and malloc_trim()
|
||||
to force release of C-level memory back to the OS.
|
||||
|
||||
Args:
|
||||
contents: data to compress (str or bytes)
|
||||
filepath: destination file path
|
||||
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
|
||||
timeout: subprocess timeout in seconds
|
||||
fallback_uncompressed: if True, save uncompressed on failure; if False, raise exception
|
||||
|
||||
Returns:
|
||||
@@ -72,85 +65,164 @@ def _brotli_subprocess_save(contents, filepath, mode=None, timeout=30, fallback_
|
||||
Raises:
|
||||
Exception: if compression fails and fallback_uncompressed is False
|
||||
"""
|
||||
import multiprocessing
|
||||
import sys
|
||||
import brotli
|
||||
import gc
|
||||
import ctypes
|
||||
|
||||
# Ensure contents are bytes
|
||||
if isinstance(contents, str):
|
||||
contents = contents.encode('utf-8')
|
||||
|
||||
# Use explicit spawn context for thread safety (avoids fork() with multi-threaded parent)
|
||||
# Always use spawn - consistent behavior in tests and production
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
# Run compression in subprocess using spawn (not fork)
|
||||
proc = ctx.Process(target=_brotli_compress_worker, args=(child_conn, filepath, mode))
|
||||
|
||||
# Windows-safe: Set daemon=False explicitly to avoid issues with process cleanup
|
||||
proc.daemon = False
|
||||
proc.start()
|
||||
|
||||
try:
|
||||
# Send data to subprocess via pipe (avoids pickle)
|
||||
parent_conn.send(contents)
|
||||
original_size = len(contents)
|
||||
logger.debug(f"Starting brotli streaming compression of {original_size} bytes.")
|
||||
|
||||
# Wait for result with timeout
|
||||
if parent_conn.poll(timeout):
|
||||
success = parent_conn.recv()
|
||||
else:
|
||||
success = False
|
||||
logger.warning(f"Brotli compression subprocess timed out after {timeout}s")
|
||||
# Graceful termination with platform-aware cleanup
|
||||
try:
|
||||
proc.terminate()
|
||||
except Exception as term_error:
|
||||
logger.debug(f"Process termination issue (may be normal on Windows): {term_error}")
|
||||
# Create streaming compressor
|
||||
compressor = brotli.Compressor(quality=6, mode=mode if mode is not None else brotli.MODE_GENERIC)
|
||||
|
||||
parent_conn.close()
|
||||
proc.join(timeout=5)
|
||||
# Stream compress in chunks to minimize memory usage
|
||||
chunk_size = 65536 # 64KB chunks
|
||||
total_compressed_size = 0
|
||||
|
||||
# Force kill if still alive after graceful termination
|
||||
if proc.is_alive():
|
||||
try:
|
||||
if sys.platform == 'win32':
|
||||
# Windows: use kill() which is more forceful
|
||||
proc.kill()
|
||||
else:
|
||||
# Unix: terminate() already sent SIGTERM, now try SIGKILL
|
||||
proc.kill()
|
||||
proc.join(timeout=2)
|
||||
except Exception as kill_error:
|
||||
logger.warning(f"Failed to kill brotli compression process: {kill_error}")
|
||||
with open(filepath, 'wb') as f:
|
||||
# Process data in chunks
|
||||
offset = 0
|
||||
while offset < len(contents):
|
||||
chunk = contents[offset:offset + chunk_size]
|
||||
compressed_chunk = compressor.process(chunk)
|
||||
if compressed_chunk:
|
||||
f.write(compressed_chunk)
|
||||
total_compressed_size += len(compressed_chunk)
|
||||
offset += chunk_size
|
||||
|
||||
# Check if file was created successfully
|
||||
if success and os.path.exists(filepath):
|
||||
return filepath
|
||||
# Finalize compression - critical for proper cleanup
|
||||
final_chunk = compressor.finish()
|
||||
if final_chunk:
|
||||
f.write(final_chunk)
|
||||
total_compressed_size += len(final_chunk)
|
||||
|
||||
logger.debug(f"Finished brotli compression - From {original_size} to {total_compressed_size} bytes.")
|
||||
|
||||
# Cleanup: Delete compressor, force Python GC, then force C-level memory release
|
||||
del compressor
|
||||
gc.collect()
|
||||
|
||||
# Force release of C-level memory back to OS (since brotli is a C library)
|
||||
try:
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass # malloc_trim not available on all systems (e.g., macOS)
|
||||
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Brotli compression error: {e}")
|
||||
try:
|
||||
parent_conn.close()
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
proc.terminate()
|
||||
proc.join(timeout=2)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Compression failed
|
||||
if fallback_uncompressed:
|
||||
logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed")
|
||||
fallback_path = filepath.replace('.br', '')
|
||||
with open(fallback_path, 'wb') as f:
|
||||
f.write(contents)
|
||||
return fallback_path
|
||||
else:
|
||||
raise Exception(f"Brotli compression subprocess failed for {filepath}")
|
||||
# Compression failed
|
||||
if fallback_uncompressed:
|
||||
logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed")
|
||||
fallback_path = filepath.replace('.br', '')
|
||||
with open(fallback_path, 'wb') as f:
|
||||
f.write(contents)
|
||||
return fallback_path
|
||||
else:
|
||||
raise Exception(f"Brotli compression failed for {filepath}: {e}")
|
||||
|
||||
|
||||
class model(watch_base):
|
||||
"""
|
||||
Watch domain model for monitoring URL changes.
|
||||
|
||||
Inherits from watch_base (which inherits dict) - see watch_base docstring for field documentation.
|
||||
|
||||
## Configuration Override Hierarchy (Chain Resolution)
|
||||
|
||||
The dream architecture uses a 3-level resolution chain:
|
||||
Watch settings → Tag/Group settings → Global settings
|
||||
|
||||
Current implementation is MANUAL (see processor.py:184-192 for example):
|
||||
- Processors manually check watch.get('field')
|
||||
- Then loop through watch.tags to find first tag with overrides_watch=True
|
||||
- Finally fall back to datastore['settings']['application']['field']
|
||||
|
||||
FUTURE: Pydantic-based chain resolution would enable:
|
||||
|
||||
```python
|
||||
# Instead of manual resolution in every processor:
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = datastore['settings']['application']['tags'][tag_uuid]
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
break
|
||||
|
||||
# Clean computed properties with automatic resolution:
|
||||
@computed_field
|
||||
def resolved_restock_settings(self) -> dict:
|
||||
if self.restock_settings:
|
||||
return self.restock_settings
|
||||
for tag_uuid in self.tags:
|
||||
tag = self._datastore.get_tag(tag_uuid)
|
||||
if tag.overrides_watch and tag.restock_settings:
|
||||
return tag.restock_settings
|
||||
return self._datastore.settings.restock_settings or {}
|
||||
|
||||
# Usage: watch.resolved_restock_settings (automatic, type-safe, tested once)
|
||||
```
|
||||
|
||||
Benefits of Pydantic migration:
|
||||
1. Single source of truth for resolution logic (not scattered across processors)
|
||||
2. Type safety + IDE autocomplete (watch.resolved_fetch_backend vs dict navigation)
|
||||
3. Database backend abstraction (domain model separate from persistence)
|
||||
4. Automatic validation at boundaries
|
||||
5. Self-documenting via type hints
|
||||
6. Easy to test resolution independently
|
||||
|
||||
Resolution chain examples that would benefit:
|
||||
- fetch_backend: watch → tag → global (see get_fetch_backend property)
|
||||
- notification_urls: watch → tag → global
|
||||
- time_between_check: watch → global (see threshold_seconds)
|
||||
- restock_settings: watch → tag (see processors/restock_diff/processor.py:184-192)
|
||||
- history_snapshot_max_length: watch → global (see save_history_blob:550-556)
|
||||
- All processor_config_* settings could use tag overrides
|
||||
|
||||
## Database Backend Abstraction with Pydantic
|
||||
|
||||
Current: Watch inherits dict, tightly coupled to file-based JSON storage
|
||||
Future: Domain model (Watch) separate from persistence layer
|
||||
|
||||
```python
|
||||
# Domain model (database-agnostic)
|
||||
class Watch(BaseModel):
|
||||
uuid: str
|
||||
url: str
|
||||
# ... validation, business logic
|
||||
|
||||
# Pluggable backends
|
||||
class DataStoreBackend(ABC):
|
||||
def save_watch(self, watch: Watch): ...
|
||||
def load_watch(self, uuid: str) -> Watch: ...
|
||||
|
||||
# Implementations: FileBackend, MongoBackend, PostgresBackend, etc.
|
||||
```
|
||||
|
||||
This would enable:
|
||||
- Easy migration between storage backends (file → postgres → mongodb)
|
||||
- Pydantic handles serialization/deserialization automatically
|
||||
- Domain logic stays clean (no storage concerns in Watch methods)
|
||||
|
||||
## Migration Path
|
||||
|
||||
Given existing codebase, incremental migration recommended:
|
||||
1. Create Pydantic models alongside existing dict-based models
|
||||
2. Add .to_pydantic() / .from_pydantic() bridge methods
|
||||
3. Gradually migrate code to use Pydantic models
|
||||
4. Remove dict inheritance once migration complete
|
||||
|
||||
See: watch_base docstring for technical debt discussion
|
||||
See: processors/restock_diff/processor.py:184-192 for manual resolution example
|
||||
See: Watch.py:550-556 for nested dict navigation that would become watch.resolved_*
|
||||
"""
|
||||
__newest_history_key = None
|
||||
__history_n = 0
|
||||
jitter_seconds = 0
|
||||
@@ -159,8 +231,15 @@ class model(watch_base):
|
||||
self.__datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
|
||||
self.__datastore = kw.get('__datastore')
|
||||
if not self.__datastore:
|
||||
raise ValueError("Watch object requires '__datastore' reference - cannot access global settings without it")
|
||||
if kw.get('__datastore'):
|
||||
del kw['__datastore']
|
||||
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
@@ -171,6 +250,9 @@ class model(watch_base):
|
||||
# Be sure the cached timestamp is ready
|
||||
bump = self.history
|
||||
|
||||
# Note: __deepcopy__, __getstate__, and __setstate__ are inherited from watch_base
|
||||
# This prevents memory leaks by sharing __datastore reference instead of copying it
|
||||
|
||||
@property
|
||||
def viewed(self):
|
||||
# Don't return viewed when last_viewed is 0 and newest_key is 0
|
||||
@@ -280,8 +362,30 @@ class model(watch_base):
|
||||
@property
|
||||
def get_fetch_backend(self):
|
||||
"""
|
||||
Like just using the `fetch_backend` key but there could be some logic
|
||||
:return:
|
||||
Get the fetch backend for this watch with special case handling.
|
||||
|
||||
CHAIN RESOLUTION OPPORTUNITY:
|
||||
Currently returns watch.fetch_backend directly, but doesn't implement
|
||||
Watch → Tag → Global resolution chain. With Pydantic:
|
||||
|
||||
@computed_field
|
||||
def resolved_fetch_backend(self) -> str:
|
||||
# Special case: PDFs always use html_requests
|
||||
if self.is_pdf:
|
||||
return 'html_requests'
|
||||
|
||||
# Watch override
|
||||
if self.fetch_backend and self.fetch_backend != 'system':
|
||||
return self.fetch_backend
|
||||
|
||||
# Tag override (first tag with overrides_watch=True wins)
|
||||
for tag_uuid in self.tags:
|
||||
tag = self._datastore.get_tag(tag_uuid)
|
||||
if tag.overrides_watch and tag.fetch_backend:
|
||||
return tag.fetch_backend
|
||||
|
||||
# Global default
|
||||
return self._datastore.settings.fetch_backend
|
||||
"""
|
||||
# Maybe also if is_image etc?
|
||||
# This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text.
|
||||
@@ -473,16 +577,49 @@ class model(watch_base):
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
return f.read()
|
||||
|
||||
def _write_atomic(self, dest, data):
|
||||
def _write_atomic(self, dest, data, mode='wb'):
|
||||
"""Write data atomically to dest using a temp file"""
|
||||
if not os.path.exists(dest):
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.replace(tmp_path, dest)
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.watch_data_dir) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.replace(tmp_path, dest)
|
||||
|
||||
def history_trim(self, newest_n_items):
|
||||
from pathlib import Path
|
||||
|
||||
# Sort by timestamp (key)
|
||||
sorted_items = sorted(self.history.items(), key=lambda x: int(x[0]))
|
||||
|
||||
keep_part = dict(sorted_items[-newest_n_items:])
|
||||
delete_part = dict(sorted_items[:-newest_n_items])
|
||||
logger.info( f"[{self.get('uuid')}] Trimming history to most recent {newest_n_items} items, keeping {len(keep_part)} items deleting {len(delete_part)} items.")
|
||||
|
||||
if delete_part:
|
||||
for item in delete_part.items():
|
||||
try:
|
||||
Path(item[1]).unlink(missing_ok=True)
|
||||
except Exception as e:
|
||||
logger.critical(f"{str(e)}")
|
||||
finally:
|
||||
logger.debug(f"[{self.get('uuid')}] Deleted {item[1]} history snapshot")
|
||||
try:
|
||||
dest = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
output = "\r\n".join(
|
||||
f"{k},{Path(v).name}"
|
||||
for k, v in keep_part.items()
|
||||
)+"\r\n"
|
||||
self._write_atomic(dest=dest, data=output, mode='w')
|
||||
except Exception as e:
|
||||
logger.critical(f"{str(e)}")
|
||||
finally:
|
||||
logger.debug(f"[{self.get('uuid')}] Updated history index {dest}")
|
||||
|
||||
# reimport
|
||||
bump = self.history
|
||||
gc.collect()
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
@@ -491,7 +628,6 @@ class model(watch_base):
|
||||
logger.trace(f"{self.get('uuid')} - Updating {self.history_index_filename} with timestamp {timestamp}")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
|
||||
# Binary data - detect file type and save without compression
|
||||
@@ -523,7 +659,7 @@ class model(watch_base):
|
||||
|
||||
if not os.path.exists(dest):
|
||||
try:
|
||||
actual_dest = _brotli_subprocess_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True)
|
||||
actual_dest = _brotli_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True)
|
||||
if actual_dest != dest:
|
||||
snapshot_fname = os.path.basename(actual_dest)
|
||||
except Exception as e:
|
||||
@@ -551,6 +687,20 @@ class model(watch_base):
|
||||
self.__newest_history_key = timestamp
|
||||
self.__history_n += 1
|
||||
|
||||
# MANUAL CHAIN RESOLUTION: Watch → Global
|
||||
# With Pydantic, this would become: maxlen = watch.resolved_history_snapshot_max_length
|
||||
# @computed_field def resolved_history_snapshot_max_length(self) -> Optional[int]:
|
||||
# if self.history_snapshot_max_length: return self.history_snapshot_max_length
|
||||
# if tag := self._get_override_tag(): return tag.history_snapshot_max_length
|
||||
# return self._datastore.settings.history_snapshot_max_length
|
||||
maxlen = (
|
||||
self.get('history_snapshot_max_length')
|
||||
or (self.__datastore and self.__datastore['settings']['application'].get('history_snapshot_max_length'))
|
||||
)
|
||||
|
||||
if maxlen and self.__history_n and self.__history_n > maxlen:
|
||||
self.history_trim(newest_n_items=maxlen)
|
||||
|
||||
# @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
|
||||
return snapshot_fname
|
||||
|
||||
@@ -663,6 +813,11 @@ class model(watch_base):
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
# Invalidate favicon filename cache
|
||||
if hasattr(self, '_favicon_filename_cache'):
|
||||
delattr(self, '_favicon_filename_cache')
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
@@ -679,20 +834,32 @@ class model(watch_base):
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
||||
|
||||
Returns:
|
||||
bytes: Contents of the newest favicon file, or None if not found.
|
||||
str: Basename of the newest favicon file, or None if not found.
|
||||
"""
|
||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
||||
cache_key = '_favicon_filename_cache'
|
||||
if hasattr(self, cache_key):
|
||||
return getattr(self, cache_key)
|
||||
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
return None
|
||||
result = None
|
||||
else:
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
result = os.path.basename(newest_file)
|
||||
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
return os.path.basename(newest_file)
|
||||
# Cache the result
|
||||
setattr(self, cache_key, result)
|
||||
return result
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
@@ -823,6 +990,57 @@ class model(watch_base):
|
||||
def toggle_mute(self):
|
||||
self['notification_muted'] ^= True
|
||||
|
||||
def commit(self):
|
||||
"""
|
||||
Save this watch immediately to disk using atomic write.
|
||||
|
||||
Replaces the old dirty-tracking system with immediate persistence.
|
||||
Uses atomic write pattern (temp file + rename) for crash safety.
|
||||
|
||||
Fire-and-forget: Logs errors but does not raise exceptions.
|
||||
Watch data remains in memory even if save fails, so next commit will retry.
|
||||
"""
|
||||
from loguru import logger
|
||||
|
||||
if not self.__datastore:
|
||||
logger.error(f"Cannot commit watch {self.get('uuid')} without datastore reference")
|
||||
return
|
||||
|
||||
if not self.watch_data_dir:
|
||||
logger.error(f"Cannot commit watch {self.get('uuid')} without datastore_path")
|
||||
return
|
||||
|
||||
# Convert to dict for serialization, excluding processor config keys
|
||||
# Processor configs are stored separately in processor-specific JSON files
|
||||
# Use deepcopy to prevent mutations from affecting the original Watch object
|
||||
import copy
|
||||
|
||||
# Acquire datastore lock to prevent concurrent modifications during copy
|
||||
# Take a quick shallow snapshot under lock, then deep copy outside lock
|
||||
lock = self.__datastore.lock if self.__datastore and hasattr(self.__datastore, 'lock') else None
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
snapshot = dict(self)
|
||||
else:
|
||||
snapshot = dict(self)
|
||||
|
||||
# Deep copy snapshot (slower, but done outside lock to minimize contention)
|
||||
watch_dict = {k: copy.deepcopy(v) for k, v in snapshot.items() if not k.startswith('processor_config_')}
|
||||
|
||||
# Normalize browser_steps: if no meaningful steps, save as empty list
|
||||
if not self.has_browser_steps:
|
||||
watch_dict['browser_steps'] = []
|
||||
|
||||
# Use existing atomic write helper
|
||||
from changedetectionio.store.file_saving_datastore import save_watch_atomic
|
||||
try:
|
||||
save_watch_atomic(self.watch_data_dir, self.get('uuid'), watch_dict)
|
||||
logger.debug(f"Committed watch {self.get('uuid')}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to commit watch {self.get('uuid')}: {e}")
|
||||
|
||||
|
||||
def extra_notification_token_values(self):
|
||||
# Used for providing extra tokens
|
||||
# return {'widget': 555}
|
||||
@@ -949,13 +1167,13 @@ class model(watch_base):
|
||||
def save_last_text_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
_brotli_subprocess_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
_brotli_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
|
||||
def save_last_fetched_html(self, timestamp, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
_brotli_subprocess_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
_brotli_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
self._prune_last_fetched_html_snapshots()
|
||||
|
||||
def get_fetched_html(self, timestamp):
|
||||
|
||||
@@ -6,6 +6,147 @@ USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||
|
||||
class watch_base(dict):
|
||||
"""
|
||||
Base watch domain model (inherits from dict for backward compatibility).
|
||||
|
||||
WARNING: This class inherits from dict, which violates proper encapsulation.
|
||||
Dict inheritance is legacy technical debt that should be refactored to a proper
|
||||
domain model (e.g., Pydantic BaseModel) for better type safety and validation.
|
||||
|
||||
TODO: Migrate to Pydantic BaseModel for:
|
||||
- Type safety and IDE autocomplete
|
||||
- Automatic validation
|
||||
- Clear separation between domain model and serialization
|
||||
- Database backend abstraction (file → postgres → mongodb)
|
||||
- Configuration override chain resolution (Watch → Tag → Global)
|
||||
- Immutability options
|
||||
- Better testing
|
||||
|
||||
CHAIN RESOLUTION ARCHITECTURE:
|
||||
The dream is a 3-level override hierarchy:
|
||||
Watch settings → Tag/Group settings → Global settings
|
||||
|
||||
Current implementation: MANUAL resolution scattered across codebase
|
||||
- Processors manually check watch.get('field')
|
||||
- Loop through tags to find overrides_watch=True
|
||||
- Fall back to datastore['settings']['application']['field']
|
||||
|
||||
Pydantic implementation: AUTOMATIC resolution via @computed_field
|
||||
- Single source of truth for each setting's resolution logic
|
||||
- Type-safe, testable, self-documenting
|
||||
- Example: watch.resolved_fetch_backend (instead of nested dict navigation)
|
||||
|
||||
See: Watch.py model docstring for detailed Pydantic architecture plan
|
||||
See: Tag.py model docstring for tag override explanation
|
||||
See: processors/restock_diff/processor.py:184-192 for current manual example
|
||||
|
||||
Core Fields:
|
||||
uuid (str): Unique identifier for this watch (auto-generated)
|
||||
url (str): Target URL to monitor for changes
|
||||
title (str|None): Custom display name (overrides page_title if set)
|
||||
page_title (str|None): Title extracted from <title> tag of monitored page
|
||||
tags (List[str]): List of tag UUIDs for categorization
|
||||
tag (str): DEPRECATED - Old single-tag system, use tags instead
|
||||
|
||||
Check Configuration:
|
||||
processor (str): Processor type ('text_json_diff', 'restock_diff', etc.)
|
||||
fetch_backend (str): Fetcher to use ('system', 'html_requests', 'playwright', etc.)
|
||||
method (str): HTTP method ('GET', 'POST', etc.)
|
||||
headers (dict): Custom HTTP headers to send
|
||||
proxy (str|None): Preferred proxy server
|
||||
paused (bool): Whether change detection is paused
|
||||
|
||||
Scheduling:
|
||||
time_between_check (dict): Check interval {'weeks': int, 'days': int, 'hours': int, 'minutes': int, 'seconds': int}
|
||||
time_between_check_use_default (bool): Use global default interval if True
|
||||
time_schedule_limit (dict): Weekly schedule limiting when checks can run
|
||||
Structure: {
|
||||
'enabled': bool,
|
||||
'monday/tuesday/.../sunday': {
|
||||
'enabled': bool,
|
||||
'start_time': str ('HH:MM'),
|
||||
'duration': {'hours': str, 'minutes': str}
|
||||
}
|
||||
}
|
||||
|
||||
Content Filtering:
|
||||
include_filters (List[str]): CSS/XPath selectors to extract content
|
||||
subtractive_selectors (List[str]): Selectors to remove from content
|
||||
ignore_text (List[str]): Text patterns to ignore in change detection
|
||||
trigger_text (List[str]): Text/regex that must be present to trigger change
|
||||
text_should_not_be_present (List[str]): Text that should NOT be present
|
||||
extract_text (List[str]): Regex patterns to extract specific text after filtering
|
||||
|
||||
Text Processing:
|
||||
trim_text_whitespace (bool): Strip leading/trailing whitespace
|
||||
sort_text_alphabetically (bool): Sort lines alphabetically before comparison
|
||||
remove_duplicate_lines (bool): Remove duplicate lines
|
||||
check_unique_lines (bool): Compare against all history for unique lines
|
||||
strip_ignored_lines (bool|None): Remove lines matching ignore patterns
|
||||
|
||||
Change Detection Filters:
|
||||
filter_text_added (bool): Include added text in change detection
|
||||
filter_text_removed (bool): Include removed text in change detection
|
||||
filter_text_replaced (bool): Include replaced text in change detection
|
||||
|
||||
Browser Automation:
|
||||
browser_steps (List[dict]): Browser automation steps for JS-heavy sites
|
||||
browser_steps_last_error_step (int|None): Last step that caused error
|
||||
webdriver_delay (int|None): Seconds to wait after page load
|
||||
webdriver_js_execute_code (str|None): JavaScript to execute before extraction
|
||||
|
||||
Restock Detection:
|
||||
in_stock_only (bool): Only trigger on in-stock transitions
|
||||
follow_price_changes (bool): Monitor price changes
|
||||
has_ldjson_price_data (bool|None): Whether page has LD-JSON price data
|
||||
track_ldjson_price_data (str|None): Track LD-JSON price data ('ACCEPT', 'REJECT', None)
|
||||
price_change_threshold_percent (float|None): Minimum price change % to trigger
|
||||
|
||||
Notifications:
|
||||
notification_urls (List[str]): Apprise URLs for notifications
|
||||
notification_title (str|None): Custom notification title template
|
||||
notification_body (str|None): Custom notification body template
|
||||
notification_format (str): Notification format (e.g., 'System default', 'Text', 'HTML')
|
||||
notification_muted (bool): Disable notifications for this watch
|
||||
notification_screenshot (bool): Include screenshot in notifications
|
||||
notification_alert_count (int): Number of notifications sent
|
||||
last_notification_error (str|None): Last notification error message
|
||||
body (str|None): DEPRECATED? Legacy notification body field
|
||||
filter_failure_notification_send (bool): Send notification on filter failures
|
||||
|
||||
History & State:
|
||||
date_created (int|None): Unix timestamp of watch creation
|
||||
last_checked (int): Unix timestamp of last check
|
||||
last_viewed (int): History snapshot key of last user view
|
||||
last_error (str|bool): Last error message or False if no error
|
||||
check_count (int): Total number of checks performed
|
||||
fetch_time (float): Duration of last fetch in seconds
|
||||
consecutive_filter_failures (int): Counter for consecutive filter match failures
|
||||
previous_md5 (str|bool): MD5 hash of previous content
|
||||
previous_md5_before_filters (str|bool): MD5 hash before filters applied
|
||||
history_snapshot_max_length (int|None): Max history snapshots to keep (None = use global)
|
||||
|
||||
Conditions:
|
||||
conditions (dict): Custom conditions for change detection logic
|
||||
conditions_match_logic (str): Logic operator ('ALL', 'ANY') for conditions
|
||||
|
||||
Metadata:
|
||||
content-type (str|None): Content-Type from last fetch
|
||||
remote_server_reply (str|None): Server header from last response
|
||||
ignore_status_codes (List[int]|None): HTTP status codes to ignore
|
||||
use_page_title_in_list (bool|None): Display page title in watch list (None = use system default)
|
||||
|
||||
Instance Attributes (not serialized):
|
||||
__datastore: Reference to parent DataStore (set externally after creation)
|
||||
watch_data_dir: Filesystem path for this watch's data directory
|
||||
|
||||
Notes:
|
||||
- Many fields default to None to distinguish "not set" from "set to default"
|
||||
- When field is None, system-level defaults are used
|
||||
- Processor-specific configs (e.g., processor_config_*) are NOT stored in watch.json
|
||||
They are stored in separate {processor_name}.json files
|
||||
- This class is used for both Watch and Tag objects (tags reuse the structure)
|
||||
"""
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
self.update({
|
||||
@@ -32,6 +173,7 @@ class watch_base(dict):
|
||||
'filter_text_replaced': True,
|
||||
'follow_price_changes': True,
|
||||
'has_ldjson_price_data': None,
|
||||
'history_snapshot_max_length': None,
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'ignore_status_codes': None,
|
||||
@@ -139,4 +281,100 @@ class watch_base(dict):
|
||||
super(watch_base, self).__init__(*arg, **kw)
|
||||
|
||||
if self.get('default'):
|
||||
del self['default']
|
||||
del self['default']
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
"""
|
||||
Custom deepcopy for all watch_base subclasses (Watch, Tag, etc.).
|
||||
|
||||
CRITICAL FIX: Prevents copying large reference objects like __datastore
|
||||
which would cause exponential memory growth when Watch objects are deepcopied.
|
||||
|
||||
This is called by:
|
||||
- api/Watch.py:76 (API endpoint)
|
||||
- api/Tags.py:28 (Tags API)
|
||||
- processors/base.py:26 (EVERY processor run)
|
||||
- store/__init__.py:544 (clone watch)
|
||||
- And other locations
|
||||
"""
|
||||
from copy import deepcopy
|
||||
|
||||
# Create new instance without calling __init__
|
||||
cls = self.__class__
|
||||
new_obj = cls.__new__(cls)
|
||||
memo[id(self)] = new_obj
|
||||
|
||||
# Copy the dict data (all the settings)
|
||||
for key, value in self.items():
|
||||
new_obj[key] = deepcopy(value, memo)
|
||||
|
||||
# Copy instance attributes dynamically
|
||||
# This handles Watch-specific attrs (like __datastore) and any future subclass attrs
|
||||
for attr_name in dir(self):
|
||||
# Skip methods, special attrs, and dict keys
|
||||
if attr_name.startswith('_') and not attr_name.startswith('__'):
|
||||
# This catches _model__datastore, _model__history_n, etc.
|
||||
try:
|
||||
attr_value = getattr(self, attr_name)
|
||||
|
||||
# Special handling: Share references to large objects instead of copying
|
||||
# Examples: __datastore, __app_reference, __global_settings, etc.
|
||||
if attr_name.endswith('__datastore') or attr_name.endswith('__app'):
|
||||
# Share the reference (don't copy!) to prevent memory leaks
|
||||
setattr(new_obj, attr_name, attr_value)
|
||||
# Skip cache attributes - let them regenerate on demand
|
||||
elif 'cache' in attr_name.lower():
|
||||
pass # Don't copy caches
|
||||
# Copy regular instance attributes
|
||||
elif not callable(attr_value):
|
||||
setattr(new_obj, attr_name, attr_value)
|
||||
except AttributeError:
|
||||
pass # Attribute doesn't exist in this instance
|
||||
|
||||
return new_obj
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Custom pickle serialization for all watch_base subclasses.
|
||||
|
||||
Excludes large reference objects (like __datastore) from serialization.
|
||||
"""
|
||||
# Get the dict data
|
||||
state = dict(self)
|
||||
|
||||
# Collect instance attributes (excluding methods and large references)
|
||||
instance_attrs = {}
|
||||
for attr_name in dir(self):
|
||||
if attr_name.startswith('_') and not attr_name.startswith('__'):
|
||||
try:
|
||||
attr_value = getattr(self, attr_name)
|
||||
# Exclude large reference objects and caches from serialization
|
||||
if not (attr_name.endswith('__datastore') or
|
||||
attr_name.endswith('__app') or
|
||||
'cache' in attr_name.lower() or
|
||||
callable(attr_value)):
|
||||
instance_attrs[attr_name] = attr_value
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
if instance_attrs:
|
||||
state['__instance_metadata__'] = instance_attrs
|
||||
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""
|
||||
Custom pickle deserialization for all watch_base subclasses.
|
||||
|
||||
WARNING: Large reference objects (like __datastore) are NOT restored!
|
||||
Caller must restore these references after unpickling if needed.
|
||||
"""
|
||||
# Extract metadata
|
||||
metadata = state.pop('__instance_metadata__', {})
|
||||
|
||||
# Restore dict data
|
||||
self.update(state)
|
||||
|
||||
# Restore instance attributes
|
||||
for attr_name, attr_value in metadata.items():
|
||||
setattr(self, attr_name, attr_value)
|
||||
@@ -105,6 +105,30 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def register_processor(self):
|
||||
"""Register an external processor plugin.
|
||||
|
||||
External packages can implement this hook to register custom processors
|
||||
that will be discovered alongside built-in processors.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with processor information:
|
||||
{
|
||||
'processor_name': str, # Machine name (e.g., 'osint_recon')
|
||||
'processor_module': module, # Module containing processor.py
|
||||
'processor_class': class, # The perform_site_check class
|
||||
'metadata': { # Optional metadata
|
||||
'name': str, # Display name
|
||||
'description': str, # Description
|
||||
'processor_weight': int,# Sort weight (lower = higher priority)
|
||||
'list_badge_text': str, # Badge text for UI
|
||||
}
|
||||
}
|
||||
Return None if this plugin doesn't provide a processor
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
|
||||
@@ -17,9 +17,11 @@ def find_sub_packages(package_name):
|
||||
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def find_processors():
|
||||
"""
|
||||
Find all subclasses of DifferenceDetectionProcessor in the specified package.
|
||||
Results are cached to avoid repeated discovery.
|
||||
|
||||
:param package_name: The name of the package to scan for processor modules.
|
||||
:return: A list of (module, class) tuples.
|
||||
@@ -46,6 +48,23 @@ def find_processors():
|
||||
except (ModuleNotFoundError, ImportError) as e:
|
||||
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
||||
|
||||
# Discover plugin processors via pluggy
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_module = result.get('processor_module')
|
||||
processor_name = result.get('processor_name')
|
||||
|
||||
if processor_module and processor_name:
|
||||
processors.append((processor_module, processor_name))
|
||||
plugin_path = getattr(processor_module, '__file__', 'unknown location')
|
||||
logger.info(f"Registered plugin processor: {processor_name} from {plugin_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error loading plugin processors: {e}")
|
||||
|
||||
return processors
|
||||
|
||||
|
||||
@@ -97,54 +116,137 @@ def find_processor_module(processor_name):
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_module(processor_name):
|
||||
"""
|
||||
Get the actual processor module (with perform_site_check class) by name.
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
|
||||
Returns:
|
||||
module: The processor module containing perform_site_check, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if processor_tuple:
|
||||
# Return the actual processor module (first element of tuple)
|
||||
return processor_tuple[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_submodule(processor_name, submodule_name):
|
||||
"""
|
||||
Get an optional submodule from a processor (e.g., 'difference', 'extract', 'preview').
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
submodule_name: Name of the submodule (e.g., 'difference', 'extract', 'preview')
|
||||
|
||||
Returns:
|
||||
module: The submodule if it exists, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if not processor_tuple:
|
||||
return None
|
||||
|
||||
processor_module = processor_tuple[0]
|
||||
parent_module = get_parent_module(processor_module)
|
||||
|
||||
if not parent_module:
|
||||
return None
|
||||
|
||||
# Try to import the submodule
|
||||
try:
|
||||
# For built-in processors: changedetectionio.processors.text_json_diff.difference
|
||||
# For plugin processors: changedetectionio_osint.difference
|
||||
parent_module_name = parent_module.__name__
|
||||
submodule_full_name = f"{parent_module_name}.{submodule_name}"
|
||||
return importlib.import_module(submodule_full_name)
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_plugin_processor_metadata():
|
||||
"""Get metadata from plugin processors."""
|
||||
metadata = {}
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_name = result.get('processor_name')
|
||||
meta = result.get('metadata', {})
|
||||
if processor_name:
|
||||
metadata[processor_name] = meta
|
||||
except Exception as e:
|
||||
logger.warning(f"Error getting plugin processor metadata: {e}")
|
||||
return metadata
|
||||
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
|
||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||
:return: A list :)
|
||||
"""
|
||||
|
||||
processor_classes = find_processors()
|
||||
|
||||
# Check if ALLOWED_PROCESSORS env var is set
|
||||
# For now we disable it, need to make a deploy with lots of new code and this will be an overload
|
||||
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', 'text_json_diff, restock_diff').strip()
|
||||
allowed_processors = None
|
||||
if allowed_processors_env:
|
||||
# Check if DISABLED_PROCESSORS env var is set
|
||||
disabled_processors_env = os.getenv('DISABLED_PROCESSORS', 'image_ssim_diff').strip()
|
||||
disabled_processors = []
|
||||
if disabled_processors_env:
|
||||
# Parse comma-separated list and strip whitespace
|
||||
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
|
||||
disabled_processors = [p.strip() for p in disabled_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"DISABLED_PROCESSORS set, disabling: {disabled_processors}")
|
||||
|
||||
available = []
|
||||
plugin_metadata = get_plugin_processor_metadata()
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Filter by allowed processors if set
|
||||
if allowed_processors and sub_package_name not in allowed_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
|
||||
# Skip disabled processors
|
||||
if sub_package_name in disabled_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (in DISABLED_PROCESSORS)")
|
||||
continue
|
||||
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
# Check if this is a plugin processor
|
||||
if sub_package_name in plugin_metadata:
|
||||
meta = plugin_metadata[sub_package_name]
|
||||
description = gettext(meta.get('name', sub_package_name))
|
||||
# Plugin processors start from weight 10 to separate them from built-in processors
|
||||
weight = 100 + meta.get('processor_weight', 0)
|
||||
else:
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
|
||||
available.append((sub_package_name, description, weight))
|
||||
|
||||
@@ -155,6 +257,20 @@ def available_processors():
|
||||
return [(name, desc) for name, desc, weight in available]
|
||||
|
||||
|
||||
def get_default_processor():
|
||||
"""
|
||||
Get the default processor to use when none is specified.
|
||||
Returns the first available processor based on weight (lowest weight = highest priority).
|
||||
This ensures forms auto-select a valid processor even when DISABLED_PROCESSORS filters the list.
|
||||
|
||||
:return: The processor name string (e.g., 'text_json_diff')
|
||||
"""
|
||||
available = available_processors()
|
||||
if available:
|
||||
return available[0][0] # Return the processor name from first tuple
|
||||
return 'text_json_diff' # Fallback if somehow no processors are available
|
||||
|
||||
|
||||
def get_processor_badge_texts():
|
||||
"""
|
||||
Get a dictionary mapping processor names to their list_badge_text values.
|
||||
@@ -279,3 +395,76 @@ def get_processor_badge_css():
|
||||
|
||||
return '\n\n'.join(css_rules)
|
||||
|
||||
|
||||
def save_processor_config(datastore, watch_uuid, config_data):
|
||||
"""
|
||||
Save processor-specific configuration to JSON file.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration storage.
|
||||
|
||||
Args:
|
||||
datastore: The application datastore instance
|
||||
watch_uuid: UUID of the watch
|
||||
config_data: Dictionary of configuration data to save (with processor_config_* prefix removed)
|
||||
|
||||
Returns:
|
||||
bool: True if saved successfully, False otherwise
|
||||
"""
|
||||
if not config_data:
|
||||
return True
|
||||
|
||||
try:
|
||||
from changedetectionio.processors.base import difference_detection_processor
|
||||
|
||||
# Get processor name from watch
|
||||
watch = datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
logger.error(f"Cannot save processor config: watch {watch_uuid} not found")
|
||||
return False
|
||||
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = difference_detection_processor(datastore, watch_uuid)
|
||||
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, config_data)
|
||||
|
||||
logger.debug(f"Saved processor config to {config_filename}: {config_data}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def extract_processor_config_from_form_data(form_data):
|
||||
"""
|
||||
Extract processor_config_* fields from form data and return separate dicts.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration extraction.
|
||||
|
||||
IMPORTANT: This function modifies form_data in-place by removing processor_config_* fields.
|
||||
|
||||
Args:
|
||||
form_data: Dictionary of form data (will be modified in-place)
|
||||
|
||||
Returns:
|
||||
dict: Dictionary of processor config data (with processor_config_* prefix removed)
|
||||
"""
|
||||
processor_config_data = {}
|
||||
|
||||
# Use list() to create a copy of keys since we're modifying the dict
|
||||
for field_name in list(form_data.keys()):
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
# Save all values (including empty strings) to allow explicit clearing of settings
|
||||
processor_config_data[config_key] = form_data[field_name]
|
||||
# Remove from form_data to prevent it from reaching datastore
|
||||
del form_data[field_name]
|
||||
|
||||
return processor_config_data
|
||||
|
||||
|
||||
@@ -23,7 +23,14 @@ class difference_detection_processor():
|
||||
def __init__(self, datastore, watch_uuid):
|
||||
self.datastore = datastore
|
||||
self.watch_uuid = watch_uuid
|
||||
|
||||
# Create a stable snapshot of the watch for processing
|
||||
# Why deepcopy?
|
||||
# 1. Prevents "dict changed during iteration" errors if watch is modified during processing
|
||||
# 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
|
||||
# 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
|
||||
@@ -12,6 +12,13 @@ processor_description = "Visual/Screenshot change detection (Fast)"
|
||||
processor_name = "image_ssim_diff"
|
||||
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = False
|
||||
supports_text_filters_and_triggers_elements = False
|
||||
supports_request_type = True
|
||||
|
||||
PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json"
|
||||
|
||||
# Subprocess timeout settings
|
||||
|
||||
@@ -130,7 +130,7 @@ def get_asset(asset_name, watch, datastore, request):
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-Asset")
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
@@ -284,7 +284,7 @@ def _draw_bounding_box_if_configured(img_bytes, watch, datastore):
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-BoundingBox")
|
||||
thread.start()
|
||||
thread.join(timeout=15)
|
||||
|
||||
@@ -393,7 +393,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-ChangePercentage")
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
|
||||
@@ -13,14 +13,9 @@ Research: https://github.com/libvips/pyvips/issues/234
|
||||
|
||||
import multiprocessing
|
||||
|
||||
# CRITICAL: Use 'spawn' instead of 'fork' to avoid inheriting parent's
|
||||
# CRITICAL: Use 'spawn' context instead of 'fork' to avoid inheriting parent's
|
||||
# LibVIPS threading state which can cause hangs in gaussblur operations
|
||||
# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
|
||||
try:
|
||||
multiprocessing.set_start_method('spawn', force=False)
|
||||
except RuntimeError:
|
||||
# Already set, ignore
|
||||
pass
|
||||
|
||||
|
||||
def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
|
||||
@@ -95,9 +90,10 @@ def generate_diff_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma,
|
||||
Returns:
|
||||
bytes: JPEG diff image or None on failure
|
||||
"""
|
||||
parent_conn, child_conn = multiprocessing.Pipe()
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
p = multiprocessing.Process(
|
||||
p = ctx.Process(
|
||||
target=_worker_generate_diff,
|
||||
args=(child_conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height)
|
||||
)
|
||||
@@ -140,7 +136,8 @@ def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, threshold
|
||||
Returns:
|
||||
float: Change percentage
|
||||
"""
|
||||
parent_conn, child_conn = multiprocessing.Pipe()
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
def _worker_calculate(conn):
|
||||
try:
|
||||
@@ -185,7 +182,7 @@ def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, threshold
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
p = multiprocessing.Process(target=_worker_calculate, args=(child_conn,))
|
||||
p = ctx.Process(target=_worker_calculate, args=(child_conn,))
|
||||
p.start()
|
||||
|
||||
result = 0.0
|
||||
@@ -233,7 +230,8 @@ def compare_images_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma,
|
||||
tuple: (changed_detected, change_percentage)
|
||||
"""
|
||||
print(f"[Parent] Starting compare_images_isolated subprocess", flush=True)
|
||||
parent_conn, child_conn = multiprocessing.Pipe()
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
|
||||
def _worker_compare(conn):
|
||||
try:
|
||||
@@ -301,7 +299,7 @@ def compare_images_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma,
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
p = multiprocessing.Process(target=_worker_compare, args=(child_conn,))
|
||||
p = ctx.Process(target=_worker_compare, args=(child_conn,))
|
||||
print(f"[Parent] Starting subprocess (pid will be assigned)", flush=True)
|
||||
p.start()
|
||||
print(f"[Parent] Subprocess started (pid={p.pid}), waiting for result (30s timeout)", flush=True)
|
||||
|
||||
@@ -204,7 +204,7 @@ class perform_site_check(difference_detection_processor):
|
||||
except Exception as e:
|
||||
exception_container[0] = e
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-Processor")
|
||||
thread.start()
|
||||
thread.join(timeout=60)
|
||||
|
||||
|
||||
@@ -4,6 +4,13 @@ from changedetectionio.model.Watch import model as BaseWatch
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||
|
||||
@@ -193,18 +193,17 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
|
||||
itemprop_availability = {}
|
||||
multiple_prices_found = False
|
||||
|
||||
# Try built-in extraction first, this will scan metadata in the HTML
|
||||
try:
|
||||
itemprop_availability = get_itemprop_availability(self.fetcher.content)
|
||||
except MoreThanOnePriceFound as e:
|
||||
# Add the real data
|
||||
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
# Don't raise immediately - let plugins try to handle this case
|
||||
# Plugins might be able to determine which price is correct
|
||||
logger.warning(f"Built-in detection found multiple prices on {watch.get('url')}, will try plugin override")
|
||||
multiple_prices_found = True
|
||||
itemprop_availability = {}
|
||||
|
||||
# If built-in extraction didn't get both price AND availability, try plugin override
|
||||
# Only check plugin if this watch is using a fetcher that might provide better data
|
||||
@@ -216,9 +215,21 @@ class perform_site_check(difference_detection_processor):
|
||||
from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
|
||||
fetcher_name = watch.get('fetch_backend', 'html_requests')
|
||||
|
||||
# Only try plugin override if not using system default (which might be anything)
|
||||
if fetcher_name and fetcher_name != 'system':
|
||||
logger.debug("Calling extra plugins for getting item price/availability")
|
||||
# Resolve 'system' to the actual fetcher being used
|
||||
# This allows plugins to work even when watch uses "system settings default"
|
||||
if fetcher_name == 'system':
|
||||
# Get the actual fetcher that was used (from self.fetcher)
|
||||
# Fetcher class name gives us the actual backend (e.g., 'html_requests', 'html_webdriver')
|
||||
actual_fetcher = type(self.fetcher).__name__
|
||||
if 'html_requests' in actual_fetcher.lower():
|
||||
fetcher_name = 'html_requests'
|
||||
elif 'webdriver' in actual_fetcher.lower() or 'playwright' in actual_fetcher.lower():
|
||||
fetcher_name = 'html_webdriver'
|
||||
logger.debug(f"Resolved 'system' fetcher to actual fetcher: {fetcher_name}")
|
||||
|
||||
# Try plugin override - plugins can decide if they support this fetcher
|
||||
if fetcher_name:
|
||||
logger.debug(f"Calling extra plugins for getting item price/availability (fetcher: {fetcher_name})")
|
||||
plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)
|
||||
|
||||
if plugin_availability:
|
||||
@@ -233,6 +244,16 @@ class perform_site_check(difference_detection_processor):
|
||||
if not plugin_availability:
|
||||
logger.debug("No item price/availability from plugins")
|
||||
|
||||
# If we had multiple prices and plugins also failed, NOW raise the exception
|
||||
if multiple_prices_found and not itemprop_availability.get('price'):
|
||||
raise ProcessorException(
|
||||
message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Something valid in get_itemprop_availability() by scraping metadata ?
|
||||
if itemprop_availability.get('price') or itemprop_availability.get('availability'):
|
||||
# Store for other usage
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
|
||||
from loguru import logger
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
|
||||
|
||||
|
||||
def _task(watch, update_handler):
|
||||
@@ -58,7 +64,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
# Only update vars that came in via the AJAX post
|
||||
p = {k: v for k, v in form.data.items() if k in form_data.keys()}
|
||||
tmp_watch.update(p)
|
||||
blank_watch_no_filters = watch_model()
|
||||
blank_watch_no_filters = watch_model(datastore_path=datastore.datastore_path, __datastore=datastore.data)
|
||||
blank_watch_no_filters['url'] = tmp_watch.get('url')
|
||||
|
||||
latest_filename = next(reversed(tmp_watch.history))
|
||||
|
||||
+246
-135
@@ -5,51 +5,57 @@ import heapq
|
||||
import queue
|
||||
import threading
|
||||
|
||||
try:
|
||||
import janus
|
||||
except ImportError:
|
||||
logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus")
|
||||
raise
|
||||
# Janus is no longer required - we use pure threading.Queue for multi-loop support
|
||||
# try:
|
||||
# import janus
|
||||
# except ImportError:
|
||||
# pass # Not needed anymore
|
||||
|
||||
|
||||
class RecheckPriorityQueue:
|
||||
"""
|
||||
Ultra-reliable priority queue using janus for async/sync bridging.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers (the actual fetchers/processors) and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
|
||||
- Synchronous code (Flask, threads) cannot use async methods without blocking
|
||||
- Async code cannot use sync methods without blocking the event loop
|
||||
- janus provides the only safe bridge between these two worlds
|
||||
|
||||
Attempting to unify to async-only would require:
|
||||
- Converting all Flask routes to async (major breaking change)
|
||||
- Using asyncio.run() in sync contexts (causes deadlocks)
|
||||
- Thread-pool wrapping (adds complexity and overhead)
|
||||
|
||||
Minimal implementation focused on reliability:
|
||||
- Pure janus for sync/async bridge
|
||||
- Thread-safe priority ordering
|
||||
- Bulletproof error handling with critical logging
|
||||
Thread-safe priority queue supporting multiple async event loops.
|
||||
|
||||
ARCHITECTURE:
|
||||
- Multiple async workers, each with its own event loop in its own thread
|
||||
- Hybrid sync/async design for maximum scalability
|
||||
- Sync interface for ticker thread (threading.Queue)
|
||||
- Async interface for workers (asyncio.Event - NO executor threads!)
|
||||
|
||||
SCALABILITY:
|
||||
- Scales to 100-200+ workers without executor thread exhaustion
|
||||
- Async workers wait on asyncio.Event (pure coroutines, no threads)
|
||||
- Sync callers use threading.Queue (backward compatible)
|
||||
|
||||
WHY NOT JANUS:
|
||||
- Janus binds to ONE event loop at creation time
|
||||
- Our architecture has 15+ workers, each with separate event loops
|
||||
- Workers in different threads/loops cannot share janus async interface
|
||||
|
||||
WHY NOT RUN_IN_EXECUTOR:
|
||||
- With 200 workers, run_in_executor() would block 200 threads
|
||||
- Exhausts ThreadPoolExecutor, starves Flask HTTP handlers
|
||||
- Pure async approach uses 0 threads while waiting
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, ticker thread
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
|
||||
import asyncio
|
||||
|
||||
# Sync interface: threading.Queue for ticker thread and Flask routes
|
||||
self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)
|
||||
|
||||
# Priority storage - thread-safe
|
||||
self._priority_items = []
|
||||
self._lock = threading.RLock()
|
||||
|
||||
|
||||
# No event signaling needed - pure polling approach
|
||||
# Workers check queue every 50ms (latency acceptable: 0-500ms)
|
||||
# Scales to 1000+ workers: each sleeping worker = ~4KB coroutine, not thread
|
||||
|
||||
# Signals for UI updates
|
||||
self.queue_length_signal = signal('queue_length')
|
||||
|
||||
|
||||
logger.debug("RecheckPriorityQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
|
||||
@@ -58,38 +64,48 @@ class RecheckPriorityQueue:
|
||||
# SYNC INTERFACE (for ticker thread)
|
||||
def put(self, item, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with priority ordering"""
|
||||
logger.trace(f"RecheckQueue.put() called for item: {self._get_item_uuid(item)}, block={block}, timeout={timeout}")
|
||||
try:
|
||||
# Add to priority storage
|
||||
# CRITICAL: Add to both priority storage AND notification queue atomically
|
||||
# to prevent desynchronization where item exists but no notification
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus sync queue
|
||||
self.sync_q.put(True, block=block, timeout=timeout)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
|
||||
# Add notification - use blocking with timeout for safety
|
||||
# Notification queue is unlimited size, so should never block in practice
|
||||
# but timeout ensures we detect any unexpected issues (deadlock, etc)
|
||||
try:
|
||||
self._notification_queue.put(True, block=True, timeout=5.0)
|
||||
except Exception as notif_e:
|
||||
# Notification failed - MUST remove from priority_items to keep in sync
|
||||
# This prevents "Priority queue inconsistency" errors in get()
|
||||
logger.critical(f"CRITICAL: Notification queue put failed, removing from priority_items: {notif_e}")
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
raise # Re-raise to be caught by outer exception handler
|
||||
|
||||
# Signal emission after successful queue - log but don't fail the operation
|
||||
# Item is already safely queued, so signal failure shouldn't affect queue state
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}")
|
||||
self._emit_put_signals(item)
|
||||
except Exception as signal_e:
|
||||
logger.error(f"Failed to emit put signals but item queued successfully: {signal_e}")
|
||||
|
||||
logger.trace(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {type(e).__name__}: {str(e)}")
|
||||
# Item should have been cleaned up in the inner try/except if notification failed
|
||||
return False
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get with priority ordering"""
|
||||
import queue
|
||||
logger.trace(f"RecheckQueue.get() called, block={block}, timeout={timeout}")
|
||||
import queue as queue_module
|
||||
try:
|
||||
# Wait for notification
|
||||
self.sync_q.get(block=block, timeout=timeout)
|
||||
# Wait for notification (this doesn't return the actual item, just signals availability)
|
||||
self._notification_queue.get(block=block, timeout=timeout)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
@@ -98,69 +114,91 @@ class RecheckPriorityQueue:
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
# Signal emission after successful retrieval - log but don't lose the item
|
||||
# Item is already retrieved, so signal failure shouldn't affect queue state
|
||||
try:
|
||||
self._emit_get_signals()
|
||||
except Exception as signal_e:
|
||||
logger.error(f"Failed to emit get signals but item retrieved successfully: {signal_e}")
|
||||
|
||||
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
logger.trace(f"RecheckQueue.get() successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except queue.Empty:
|
||||
# Queue is empty with timeout - expected behavior, re-raise without logging
|
||||
raise
|
||||
except queue_module.Empty:
|
||||
# Queue is empty with timeout - expected behavior
|
||||
logger.trace(f"RecheckQueue.get() timed out - queue is empty (timeout={timeout})")
|
||||
raise # noqa
|
||||
except Exception as e:
|
||||
# Re-raise without logging - caller (worker) will handle and log appropriately
|
||||
logger.trace(f"RecheckQueue.get() failed with exception: {type(e).__name__}: {str(e)}")
|
||||
raise
|
||||
|
||||
# ASYNC INTERFACE (for workers)
|
||||
async def async_put(self, item):
|
||||
"""Pure async put with priority ordering"""
|
||||
async def async_put(self, item, executor=None):
|
||||
"""Async put with priority ordering - uses thread pool to avoid blocking
|
||||
|
||||
Args:
|
||||
item: Item to add to queue
|
||||
executor: Optional ThreadPoolExecutor. If None, uses default pool.
|
||||
"""
|
||||
logger.trace(f"RecheckQueue.async_put() called for item: {self._get_item_uuid(item)}, executor={executor}")
|
||||
import asyncio
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus async queue
|
||||
await self.async_q.put(True)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
# Use run_in_executor to call sync put without blocking event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
executor, # Use provided executor or default
|
||||
lambda: self.put(item, block=True, timeout=5.0)
|
||||
)
|
||||
|
||||
logger.trace(f"RecheckQueue.async_put() successfully queued item: {self._get_item_uuid(item)}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get with priority ordering"""
|
||||
|
||||
async def async_get(self, executor=None, timeout=1.0):
|
||||
"""
|
||||
Efficient async get using executor for blocking call.
|
||||
|
||||
HYBRID APPROACH: Best of both worlds
|
||||
- Uses run_in_executor for efficient blocking (no polling overhead)
|
||||
- Single timeout (no double-timeout race condition)
|
||||
- Scales well: executor sized to match worker count
|
||||
|
||||
With FETCH_WORKERS=10: 10 threads blocked max (acceptable)
|
||||
With FETCH_WORKERS=200: Need executor with 200+ threads (see worker_pool.py)
|
||||
|
||||
Args:
|
||||
executor: ThreadPoolExecutor (sized to match worker count)
|
||||
timeout: Maximum time to wait in seconds
|
||||
|
||||
Returns:
|
||||
Item from queue
|
||||
|
||||
Raises:
|
||||
queue.Empty: If timeout expires with no item available
|
||||
"""
|
||||
logger.trace(f"RecheckQueue.async_get() called, timeout={timeout}")
|
||||
import asyncio
|
||||
try:
|
||||
# Wait for notification
|
||||
await self.async_q.get()
|
||||
# Use run_in_executor to call sync get efficiently
|
||||
# No outer asyncio.wait_for wrapper = no double timeout issue!
|
||||
loop = asyncio.get_event_loop()
|
||||
item = await loop.run_in_executor(
|
||||
executor,
|
||||
lambda: self.get(block=True, timeout=timeout)
|
||||
)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
|
||||
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
|
||||
logger.trace(f"RecheckQueue.async_get() successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except queue.Empty:
|
||||
logger.trace(f"RecheckQueue.async_get() timed out - queue is empty")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {type(e).__name__}: {str(e)}")
|
||||
raise
|
||||
|
||||
# UTILITY METHODS
|
||||
@@ -176,11 +214,45 @@ class RecheckPriorityQueue:
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
|
||||
def get_queued_uuids(self) -> list:
|
||||
"""Get list of all queued UUIDs efficiently with single lock"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
with self._lock:
|
||||
return [item.item['uuid'] for item in self._priority_items if hasattr(item, 'item') and 'uuid' in item.item]
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queued UUIDs: {str(e)}")
|
||||
return []
|
||||
|
||||
def clear(self):
|
||||
"""Clear all items from both priority storage and notification queue"""
|
||||
try:
|
||||
with self._lock:
|
||||
# Clear priority items
|
||||
self._priority_items.clear()
|
||||
|
||||
# Drain all notifications to prevent stale notifications
|
||||
# This is critical for test cleanup to prevent queue desynchronization
|
||||
drained = 0
|
||||
while not self._notification_queue.empty():
|
||||
try:
|
||||
self._notification_queue.get_nowait()
|
||||
drained += 1
|
||||
except queue.Empty:
|
||||
break
|
||||
|
||||
if drained > 0:
|
||||
logger.debug(f"Cleared queue: removed {drained} notifications")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to clear queue: {str(e)}")
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""Close the queue"""
|
||||
try:
|
||||
# Nothing to close for threading.Queue
|
||||
logger.debug("RecheckPriorityQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")
|
||||
@@ -312,7 +384,7 @@ class RecheckPriorityQueue:
|
||||
except Exception:
|
||||
pass
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def _emit_put_signals(self, item):
|
||||
"""Emit signals when item is added"""
|
||||
try:
|
||||
@@ -321,14 +393,14 @@ class RecheckPriorityQueue:
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=item.item['uuid'])
|
||||
|
||||
|
||||
# Queue length signal
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}")
|
||||
|
||||
|
||||
def _emit_get_signals(self):
|
||||
"""Emit signals when item is removed"""
|
||||
try:
|
||||
@@ -352,76 +424,115 @@ class NotificationQueue:
|
||||
Simple wrapper around janus with bulletproof error handling.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
def __init__(self, maxsize: int = 0, datastore=None):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, threads
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
# Use pure threading.Queue to avoid event loop binding issues
|
||||
self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)
|
||||
self.notification_event_signal = signal('notification_event')
|
||||
self.datastore = datastore # For checking all_muted setting
|
||||
self._lock = threading.RLock()
|
||||
logger.debug("NotificationQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
|
||||
raise
|
||||
|
||||
def set_datastore(self, datastore):
|
||||
"""Set datastore reference after initialization (for circular dependency handling)"""
|
||||
self.datastore = datastore
|
||||
|
||||
def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with signal emission"""
|
||||
logger.trace(f"NotificationQueue.put() called for item: {item.get('uuid', 'unknown')}, block={block}, timeout={timeout}")
|
||||
try:
|
||||
self.sync_q.put(item, block=block, timeout=timeout)
|
||||
# Check if all notifications are muted
|
||||
if self.datastore and self.datastore.data['settings']['application'].get('all_muted', False):
|
||||
logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
|
||||
return False
|
||||
|
||||
with self._lock:
|
||||
self._notification_queue.put(item, block=block, timeout=timeout)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
logger.trace(f"NotificationQueue.put() successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_put(self, item: Dict[str, Any]):
|
||||
"""Pure async put with signal emission"""
|
||||
async def async_put(self, item: Dict[str, Any], executor=None):
|
||||
"""Async put with signal emission - uses thread pool
|
||||
|
||||
Args:
|
||||
item: Notification item to queue
|
||||
executor: Optional ThreadPoolExecutor
|
||||
"""
|
||||
logger.trace(f"NotificationQueue.async_put() called for item: {item.get('uuid', 'unknown')}, executor={executor}")
|
||||
import asyncio
|
||||
try:
|
||||
await self.async_q.put(item)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
|
||||
# Check if all notifications are muted
|
||||
if self.datastore and self.datastore.data['settings']['application'].get('all_muted', False):
|
||||
logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
|
||||
return False
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(executor, lambda: self.put(item, block=True, timeout=5.0))
|
||||
logger.trace(f"NotificationQueue.async_put() successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get"""
|
||||
logger.trace(f"NotificationQueue.get() called, block={block}, timeout={timeout}")
|
||||
try:
|
||||
return self.sync_q.get(block=block, timeout=timeout)
|
||||
with self._lock:
|
||||
item = self._notification_queue.get(block=block, timeout=timeout)
|
||||
logger.trace(f"NotificationQueue.get() retrieved item: {item.get('uuid', 'unknown') if isinstance(item, dict) else 'unknown'}")
|
||||
return item
|
||||
except queue.Empty as e:
|
||||
logger.trace(f"NotificationQueue.get() timed out - queue is empty (timeout={timeout})")
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {type(e).__name__}: {str(e)}")
|
||||
raise e
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get"""
|
||||
|
||||
async def async_get(self, executor=None):
|
||||
"""Async get - uses thread pool
|
||||
|
||||
Args:
|
||||
executor: Optional ThreadPoolExecutor
|
||||
"""
|
||||
logger.trace(f"NotificationQueue.async_get() called, executor={executor}")
|
||||
import asyncio
|
||||
try:
|
||||
return await self.async_q.get()
|
||||
loop = asyncio.get_event_loop()
|
||||
item = await loop.run_in_executor(executor, lambda: self.get(block=True, timeout=1.0))
|
||||
logger.trace(f"NotificationQueue.async_get() retrieved item: {item.get('uuid', 'unknown') if isinstance(item, dict) else 'unknown'}")
|
||||
return item
|
||||
except queue.Empty as e:
|
||||
logger.trace(f"NotificationQueue.async_get() timed out - queue is empty")
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to async get notification: {type(e).__name__}: {str(e)}")
|
||||
raise e
|
||||
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
return self.sync_q.qsize()
|
||||
with self._lock:
|
||||
return self._notification_queue.qsize()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
|
||||
return 0
|
||||
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
"""Close the queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
# Nothing to close for threading.Queue
|
||||
logger.debug("NotificationQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")
|
||||
|
||||
@@ -37,9 +37,9 @@ def register_watch_operation_handlers(socketio, datastore):
|
||||
# Import here to avoid circular imports
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
logger.info(f"Socket.IO: Queued recheck for watch {uuid}")
|
||||
else:
|
||||
emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'})
|
||||
|
||||
@@ -145,16 +145,13 @@ def handle_watch_update(socketio, **kwargs):
|
||||
# Emit the watch update to all connected clients
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio.flask_app import _jinja2_filter_datetime
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
# Get list of watches that are currently running
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
# Get list of watches in the queue
|
||||
queue_list = []
|
||||
for q_item in update_q.queue:
|
||||
if hasattr(q_item, 'item') and 'uuid' in q_item.item:
|
||||
queue_list.append(q_item.item['uuid'])
|
||||
# Get list of watches in the queue (efficient single-lock method)
|
||||
queue_list = update_q.get_queued_uuids()
|
||||
|
||||
# Get the error texts from the watch
|
||||
error_texts = watch.compile_error_texts()
|
||||
@@ -243,7 +240,10 @@ def init_socketio(app, datastore):
|
||||
async_mode=async_mode,
|
||||
cors_allowed_origins=cors_origins, # None means same-origin only
|
||||
logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')),
|
||||
engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')))
|
||||
engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')),
|
||||
# Disable WebSocket compression to prevent memory accumulation
|
||||
# Flask-Compress already handles HTTP response compression
|
||||
engineio_options={'http_compression': False, 'compression_threshold': 0})
|
||||
|
||||
# Set up event handlers
|
||||
logger.info("Socket.IO: Registering connect event handler")
|
||||
@@ -252,23 +252,34 @@ def init_socketio(app, datastore):
|
||||
def event_checkbox_operations(data):
|
||||
from changedetectionio.blueprint.ui import _handle_operations
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, watch_check_update
|
||||
import threading
|
||||
|
||||
logger.trace(f"Got checkbox operations event: {data}")
|
||||
|
||||
datastore = socketio.datastore
|
||||
|
||||
_handle_operations(
|
||||
op=data.get('op'),
|
||||
uuids=data.get('uuids'),
|
||||
datastore=datastore,
|
||||
extra_data=data.get('extra_data'),
|
||||
worker_handler=worker_handler,
|
||||
update_q=update_q,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
watch_check_update=watch_check_update,
|
||||
emit_flash=False
|
||||
)
|
||||
def run_operation():
|
||||
"""Run the operation in a background thread to avoid blocking the socket.io event loop"""
|
||||
try:
|
||||
_handle_operations(
|
||||
op=data.get('op'),
|
||||
uuids=data.get('uuids'),
|
||||
datastore=datastore,
|
||||
extra_data=data.get('extra_data'),
|
||||
worker_pool=worker_pool,
|
||||
update_q=update_q,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
watch_check_update=watch_check_update,
|
||||
emit_flash=False
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in checkbox operation thread: {e}")
|
||||
|
||||
# Start operation in a disposable daemon thread
|
||||
thread = threading.Thread(target=run_operation, daemon=True, name=f"checkbox-op-{data.get('op')}")
|
||||
thread.start()
|
||||
|
||||
@socketio.on('connect')
|
||||
def handle_connect():
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
rm tests/logs/* -f
|
||||
|
||||
# Since theres no curl installed lets roll with python3
|
||||
check_sanity() {
|
||||
@@ -64,41 +65,38 @@ data_sanity_test
|
||||
echo "-------------------- Running rest of tests in parallel -------------------------------"
|
||||
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false \
|
||||
FETCH_WORKERS=2 REMOVE_REQUESTS_OLD_SCREENSHOTS=false \
|
||||
pytest tests/test_*.py \
|
||||
-n 30 \
|
||||
-n 8 \
|
||||
--dist=load \
|
||||
-vvv \
|
||||
-s \
|
||||
--capture=no \
|
||||
-k "not test_queue_system" \
|
||||
--log-cli-level=DEBUG \
|
||||
--log-cli-format="%(asctime)s [%(process)d] [%(levelname)s] %(name)s: %(message)s"
|
||||
|
||||
echo "---------------------------- DONE parallel test ---------------------------------------"
|
||||
|
||||
FETCH_WORKERS=20 pytest -vvv -s tests/test_queue_handler.py
|
||||
|
||||
echo "RUNNING WITH BASE_URL SET"
|
||||
|
||||
# Now re-run some tests with BASE_URL enabled
|
||||
# Re #65 - Ability to include a link back to the installation, in the notification.
|
||||
export BASE_URL="https://really-unique-domain.io"
|
||||
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 tests/test_notification.py
|
||||
|
||||
|
||||
# Re-run with HIDE_REFERER set - could affect login
|
||||
export HIDE_REFERER=True
|
||||
pytest -vv -s --maxfail=1 tests/test_access_control.py
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 tests/test_notification.py tests/test_access_control.py
|
||||
|
||||
|
||||
# Re-run a few tests that will trigger brotli based storage
|
||||
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
||||
pytest -vv -s --maxfail=1 tests/test_access_control.py
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
||||
pytest -vv -s --maxfail=1 tests/test_backend.py
|
||||
pytest -vv -s --maxfail=1 tests/test_rss.py
|
||||
pytest -vv -s --maxfail=1 tests/test_unique_lines.py
|
||||
# And again with brotli+screenshot attachment
|
||||
SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5 REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 --dist=load tests/test_backend.py tests/test_rss.py tests/test_unique_lines.py tests/test_notification.py tests/test_access_control.py
|
||||
|
||||
# Try high concurrency
|
||||
FETCH_WORKERS=50 pytest tests/test_history_consistency.py -vv -l -s
|
||||
# Try high concurrency with aggressive worker restarts
|
||||
FETCH_WORKERS=50 WORKER_MAX_RUNTIME=2 WORKER_MAX_JOBS=1 pytest tests/test_history_consistency.py -vv -l -s
|
||||
|
||||
# Check file:// will pickup a file when enabled
|
||||
echo "Hello world" > /tmp/test-file.txt
|
||||
|
||||
@@ -1,19 +1,25 @@
|
||||
{
|
||||
"name": "",
|
||||
"short_name": "",
|
||||
"name": "ChangeDetection.io",
|
||||
"short_name": "ChangeDetect",
|
||||
"description": "Self-hosted website change detection and monitoring",
|
||||
"icons": [
|
||||
{
|
||||
"src": "android-chrome-192x192.png",
|
||||
"sizes": "192x192",
|
||||
"type": "image/png"
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
},
|
||||
{
|
||||
"src": "android-chrome-256x256.png",
|
||||
"sizes": "256x256",
|
||||
"type": "image/png"
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
}
|
||||
],
|
||||
"theme_color": "#ffffff",
|
||||
"start_url": "/",
|
||||
"theme_color": "#5bbad5",
|
||||
"background_color": "#ffffff",
|
||||
"display": "standalone"
|
||||
"display": "standalone",
|
||||
"categories": ["utilities", "productivity"],
|
||||
"orientation": "any"
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ $(document).ready(function () {
|
||||
|
||||
// Cache DOM elements for performance
|
||||
const queueBubble = document.getElementById('queue-bubble');
|
||||
|
||||
const queueSizePagerInfoText = document.getElementById('queue-size-int');
|
||||
// Only try to connect if authentication isn't required or user is authenticated
|
||||
// The 'is_authenticated' variable will be set in the template
|
||||
if (typeof is_authenticated !== 'undefined' ? is_authenticated : true) {
|
||||
@@ -118,6 +118,10 @@ $(document).ready(function () {
|
||||
|
||||
socket.on('queue_size', function (data) {
|
||||
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
||||
if(queueSizePagerInfoText) {
|
||||
queueSizePagerInfoText.textContent = parseInt(data.q_length).toLocaleString() || 'None';
|
||||
}
|
||||
document.body.classList.toggle('has-queue', parseInt(data.q_length) > 0);
|
||||
|
||||
// Update queue bubble in action sidebar
|
||||
//if (queueBubble) {
|
||||
|
||||
@@ -69,6 +69,19 @@
|
||||
}
|
||||
});
|
||||
|
||||
// Handle Enter key in search input
|
||||
if (searchInput) {
|
||||
searchInput.addEventListener('keydown', function(e) {
|
||||
if (e.key === 'Enter') {
|
||||
e.preventDefault();
|
||||
if (searchForm) {
|
||||
// Trigger form submission programmatically
|
||||
searchForm.dispatchEvent(new Event('submit'));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Handle form submission
|
||||
if (searchForm) {
|
||||
searchForm.addEventListener('submit', function(e) {
|
||||
@@ -88,8 +101,8 @@
|
||||
params.append('tags', tags);
|
||||
}
|
||||
|
||||
// Navigate to search results
|
||||
window.location.href = '?' + params.toString();
|
||||
// Navigate to search results (always redirect to watchlist home)
|
||||
window.location.href = '/?' + params.toString();
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
@@ -110,6 +110,9 @@
|
||||
background: var(--color-background-menu-link-hover);
|
||||
}
|
||||
}
|
||||
&#menu-pause, &#menu-mute {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,13 @@
|
||||
padding: 0.5rem 1em;
|
||||
line-height: 1.2rem;
|
||||
}
|
||||
#menu-mute, #menu-pause {
|
||||
padding-left: 0.3rem;
|
||||
padding-right: 0.3rem;
|
||||
img {
|
||||
height: 1.2rem;
|
||||
}
|
||||
}
|
||||
|
||||
.pure-menu-item {
|
||||
svg {
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
.pagination-page-info {
|
||||
color: #fff;
|
||||
font-size: 0.85rem;
|
||||
text-transform: capitalize;
|
||||
}
|
||||
|
||||
|
||||
@@ -184,24 +184,24 @@ html[data-darkmode=true] {
|
||||
// Mobile adjustments
|
||||
@media only screen and (max-width: 768px) {
|
||||
.toast-container {
|
||||
left: 10px !important;
|
||||
right: 10px !important;
|
||||
top: 10px !important;
|
||||
transform: none !important;
|
||||
align-items: stretch;
|
||||
left: 50% !important;
|
||||
right: auto !important;
|
||||
top: 80px !important;
|
||||
transform: translateX(-50%) !important;
|
||||
align-items: center;
|
||||
|
||||
&.toast-bottom-right,
|
||||
&.toast-bottom-center,
|
||||
&.toast-bottom-left {
|
||||
top: auto !important;
|
||||
bottom: 10px !important;
|
||||
bottom: 80px !important;
|
||||
}
|
||||
}
|
||||
|
||||
.toast {
|
||||
min-width: auto;
|
||||
max-width: none;
|
||||
width: 100%;
|
||||
width: 80vw;
|
||||
transform: translateY(-100px);
|
||||
|
||||
&.toast-show {
|
||||
|
||||
@@ -125,6 +125,11 @@ $grid-gap: 0.5rem;
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
// Empty state message - span full width on mobile
|
||||
> td[colspan] {
|
||||
grid-column: 1 / -1;
|
||||
}
|
||||
|
||||
> td.title-col {
|
||||
grid-column: 1 / -1;
|
||||
grid-row: 1;
|
||||
|
||||
@@ -1,4 +1,32 @@
|
||||
/* table related */
|
||||
#stats_row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
width: 100%;
|
||||
color: #fff;
|
||||
font-size: 0.85rem;
|
||||
>* {
|
||||
padding-bottom: 0.5rem;
|
||||
}
|
||||
.left {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.right {
|
||||
opacity: 0.5;
|
||||
transition: opacity 0.6s ease;
|
||||
margin-left: auto; /* pushes it to the far right */
|
||||
text-align: right;
|
||||
}
|
||||
}
|
||||
body.has-queue {
|
||||
#stats_row {
|
||||
.right {
|
||||
opacity: 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.watch-table {
|
||||
width: 100%;
|
||||
font-size: 80%;
|
||||
|
||||
@@ -33,6 +33,31 @@
|
||||
@use "parts/login_form";
|
||||
@use "parts/tabs";
|
||||
|
||||
// Smooth transitions for theme switching
|
||||
body,
|
||||
.pure-table,
|
||||
.pure-table thead,
|
||||
.pure-table td,
|
||||
.pure-table th,
|
||||
.pure-form input,
|
||||
.pure-form textarea,
|
||||
.pure-form select,
|
||||
.edit-form .inner,
|
||||
.pure-menu-horizontal,
|
||||
footer,
|
||||
.sticky-tab,
|
||||
#diff-jump,
|
||||
.button-tag,
|
||||
#new-watch-form,
|
||||
#new-watch-form input:not(.pure-button),
|
||||
code,
|
||||
.messages li,
|
||||
#checkbox-operations,
|
||||
.inline-warning,
|
||||
a,
|
||||
.watch-controls img {
|
||||
transition: color 0.4s ease, background-color 0.4s ease, background 0.4s ease, border-color 0.4s ease, box-shadow 0.4s ease;
|
||||
}
|
||||
|
||||
body {
|
||||
color: var(--color-text);
|
||||
@@ -197,6 +222,19 @@ code {
|
||||
color: var(--color-white);
|
||||
background: var(--color-text-watch-tag-list);
|
||||
@extend .inline-tag;
|
||||
|
||||
/* Remove default anchor styling when used as links */
|
||||
text-decoration: none;
|
||||
|
||||
&:hover {
|
||||
text-decoration: none;
|
||||
opacity: 0.8;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
&:visited {
|
||||
color: var(--color-white);
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
Base classes for the datastore.
|
||||
|
||||
This module defines the abstract interfaces that all datastore implementations must follow.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from threading import Lock
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class DataStore(ABC):
|
||||
"""
|
||||
Abstract base class for all datastore implementations.
|
||||
|
||||
Defines the core interface that all datastores must implement for:
|
||||
- Loading and saving data
|
||||
- Managing watches
|
||||
- Handling settings
|
||||
- Providing data access
|
||||
"""
|
||||
|
||||
lock = Lock()
|
||||
datastore_path = None
|
||||
|
||||
@abstractmethod
|
||||
def reload_state(self, datastore_path, include_default_watches, version_tag):
|
||||
"""
|
||||
Load data from persistent storage.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
include_default_watches: Whether to create default watches if none exist
|
||||
version_tag: Application version string
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def add_watch(self, url, **kwargs):
|
||||
"""
|
||||
Add a new watch.
|
||||
|
||||
Args:
|
||||
url: URL to watch
|
||||
**kwargs: Additional watch parameters
|
||||
|
||||
Returns:
|
||||
UUID of the created watch
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def update_watch(self, uuid, update_obj):
|
||||
"""
|
||||
Update an existing watch.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
update_obj: Dictionary of fields to update
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete(self, uuid):
|
||||
"""
|
||||
Delete a watch.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID to delete
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def data(self):
|
||||
"""
|
||||
Access to the underlying data structure.
|
||||
|
||||
Returns:
|
||||
Dictionary containing all datastore data
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,381 @@
|
||||
"""
|
||||
File-based datastore with individual watch persistence and immediate commits.
|
||||
|
||||
This module provides the FileSavingDataStore abstract class that implements:
|
||||
- Individual watch.json file persistence
|
||||
- Immediate commit-based persistence (watch.commit(), datastore.commit())
|
||||
- Atomic file writes safe for NFS/NAS
|
||||
"""
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from .base import DataStore
|
||||
from .. import strtobool
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
HAS_ORJSON = True
|
||||
except ImportError:
|
||||
HAS_ORJSON = False
|
||||
|
||||
# Fsync configuration: Force file data to disk for crash safety
|
||||
# Default False to match legacy behavior (write-and-rename without fsync)
|
||||
# Set to True for mission-critical deployments requiring crash consistency
|
||||
FORCE_FSYNC_DATA_IS_CRITICAL = bool(strtobool(os.getenv('FORCE_FSYNC_DATA_IS_CRITICAL', 'False')))
|
||||
|
||||
# ============================================================================
|
||||
# Helper Functions for Atomic File Operations
|
||||
# ============================================================================
|
||||
|
||||
def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
|
||||
"""
|
||||
Save JSON data to disk using atomic write pattern.
|
||||
|
||||
Generic helper for saving any JSON data (settings, watches, etc.) with:
|
||||
- Atomic write (temp file + rename)
|
||||
- Directory fsync for crash consistency (only for new files)
|
||||
- Size validation
|
||||
- Proper error handling
|
||||
|
||||
Thread safety: Caller must hold datastore.lock to prevent concurrent modifications.
|
||||
Multi-process safety: Not supported - run only one app instance per datastore.
|
||||
|
||||
Args:
|
||||
file_path: Full path to target JSON file
|
||||
data_dict: Dictionary to serialize
|
||||
label: Human-readable label for error messages (e.g., "watch", "settings")
|
||||
max_size_mb: Maximum allowed file size in MB
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds max_size_mb
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
"""
|
||||
# Check if file already exists (before we start writing)
|
||||
# Directory fsync only needed for NEW files to persist the filename
|
||||
file_exists = os.path.exists(file_path)
|
||||
|
||||
# Ensure parent directory exists
|
||||
parent_dir = os.path.dirname(file_path)
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
|
||||
# Create temp file in same directory (required for NFS atomicity)
|
||||
fd, temp_path = tempfile.mkstemp(
|
||||
suffix='.tmp',
|
||||
prefix='json-',
|
||||
dir=parent_dir,
|
||||
text=False
|
||||
)
|
||||
|
||||
fd_closed = False
|
||||
try:
|
||||
# Serialize data
|
||||
t0 = time.time()
|
||||
if HAS_ORJSON:
|
||||
data = orjson.dumps(data_dict, option=orjson.OPT_INDENT_2)
|
||||
else:
|
||||
data = json.dumps(data_dict, indent=2, ensure_ascii=False).encode('utf-8')
|
||||
serialize_ms = (time.time() - t0) * 1000
|
||||
|
||||
# Safety check: validate size
|
||||
MAX_SIZE = max_size_mb * 1024 * 1024
|
||||
data_size = len(data)
|
||||
if data_size > MAX_SIZE:
|
||||
raise ValueError(
|
||||
f"{label.capitalize()} data is unexpectedly large: {data_size / 1024 / 1024:.2f}MB "
|
||||
f"(max: {max_size_mb}MB). This indicates a bug or data corruption."
|
||||
)
|
||||
|
||||
# Write to temp file
|
||||
t1 = time.time()
|
||||
os.write(fd, data)
|
||||
write_ms = (time.time() - t1) * 1000
|
||||
|
||||
# Optional fsync: Force file data to disk for crash safety
|
||||
# Only if FORCE_FSYNC_DATA_IS_CRITICAL=True (default: False, matches legacy behavior)
|
||||
t2 = time.time()
|
||||
if FORCE_FSYNC_DATA_IS_CRITICAL:
|
||||
os.fsync(fd)
|
||||
file_fsync_ms = (time.time() - t2) * 1000
|
||||
|
||||
os.close(fd)
|
||||
fd_closed = True
|
||||
|
||||
# Atomic rename
|
||||
t3 = time.time()
|
||||
os.replace(temp_path, file_path)
|
||||
rename_ms = (time.time() - t3) * 1000
|
||||
|
||||
# Sync directory to ensure filename metadata is durable
|
||||
# OPTIMIZATION: Only needed for NEW files. Existing files already have
|
||||
# directory entry persisted, so we only need file fsync for data durability.
|
||||
dir_fsync_ms = 0
|
||||
if not file_exists:
|
||||
try:
|
||||
dir_fd = os.open(parent_dir, os.O_RDONLY)
|
||||
try:
|
||||
t4 = time.time()
|
||||
os.fsync(dir_fd)
|
||||
dir_fsync_ms = (time.time() - t4) * 1000
|
||||
finally:
|
||||
os.close(dir_fd)
|
||||
except (OSError, AttributeError):
|
||||
# Windows doesn't support fsync on directories
|
||||
pass
|
||||
|
||||
# Log timing breakdown for slow saves
|
||||
# total_ms = serialize_ms + write_ms + file_fsync_ms + rename_ms + dir_fsync_ms
|
||||
# if total_ms: # Log if save took more than 10ms
|
||||
# file_status = "new" if not file_exists else "update"
|
||||
# logger.trace(
|
||||
# f"Save timing breakdown ({total_ms:.1f}ms total, {file_status}): "
|
||||
# f"serialize={serialize_ms:.1f}ms, write={write_ms:.1f}ms, "
|
||||
# f"file_fsync={file_fsync_ms:.1f}ms, rename={rename_ms:.1f}ms, "
|
||||
# f"dir_fsync={dir_fsync_ms:.1f}ms, using_orjson={HAS_ORJSON}"
|
||||
# )
|
||||
|
||||
except OSError as e:
|
||||
# Cleanup temp file
|
||||
if not fd_closed:
|
||||
try:
|
||||
os.close(fd)
|
||||
except:
|
||||
pass
|
||||
if os.path.exists(temp_path):
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Provide helpful error messages
|
||||
if e.errno == 28: # ENOSPC
|
||||
raise OSError(f"Disk full: Cannot save {label}") from e
|
||||
elif e.errno == 122: # EDQUOT
|
||||
raise OSError(f"Disk quota exceeded: Cannot save {label}") from e
|
||||
else:
|
||||
raise OSError(f"I/O error saving {label}: {e}") from e
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup temp file
|
||||
if not fd_closed:
|
||||
try:
|
||||
os.close(fd)
|
||||
except:
|
||||
pass
|
||||
if os.path.exists(temp_path):
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except:
|
||||
pass
|
||||
raise e
|
||||
|
||||
|
||||
def save_watch_atomic(watch_dir, uuid, watch_dict):
|
||||
"""
|
||||
Save a watch to disk using atomic write pattern.
|
||||
|
||||
Convenience wrapper around save_json_atomic for watches.
|
||||
|
||||
Args:
|
||||
watch_dir: Directory for this watch (e.g., /datastore/{uuid})
|
||||
uuid: Watch UUID (for logging)
|
||||
watch_dict: Dictionary representation of the watch
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds 10MB (indicates bug or corruption)
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
"""
|
||||
watch_json = os.path.join(watch_dir, "watch.json")
|
||||
save_json_atomic(watch_json, watch_dict, label=f"watch {uuid}", max_size_mb=10)
|
||||
|
||||
|
||||
def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
"""
|
||||
Load a watch from its JSON file.
|
||||
|
||||
Args:
|
||||
watch_json: Path to the watch.json file
|
||||
uuid: Watch UUID
|
||||
rehydrate_entity_func: Function to convert dict to Watch object
|
||||
|
||||
Returns:
|
||||
Tuple of (Watch object, raw_data_dict) or (None, None) if failed
|
||||
The raw_data_dict is needed to compute the hash before rehydration
|
||||
"""
|
||||
try:
|
||||
# Check file size before reading
|
||||
file_size = os.path.getsize(watch_json)
|
||||
MAX_WATCH_SIZE = 10 * 1024 * 1024 # 10MB
|
||||
if file_size > MAX_WATCH_SIZE:
|
||||
logger.critical(
|
||||
f"CORRUPTED WATCH DATA: Watch {uuid} file is unexpectedly large: "
|
||||
f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_WATCH_SIZE / 1024 / 1024}MB). "
|
||||
f"File: {watch_json}. This indicates a bug or data corruption. "
|
||||
f"Watch will be skipped."
|
||||
)
|
||||
return None, None
|
||||
|
||||
if HAS_ORJSON:
|
||||
with open(watch_json, 'rb') as f:
|
||||
watch_data = orjson.loads(f.read())
|
||||
else:
|
||||
with open(watch_json, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
|
||||
# Return both the raw data and the rehydrated watch
|
||||
# Raw data is needed to compute hash before rehydration changes anything
|
||||
watch_obj = rehydrate_entity_func(uuid, watch_data)
|
||||
return watch_obj, watch_data
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.critical(
|
||||
f"CORRUPTED WATCH DATA: Failed to parse JSON for watch {uuid}. "
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None, None
|
||||
except ValueError as e:
|
||||
# orjson raises ValueError for invalid JSON
|
||||
if "invalid json" in str(e).lower() or HAS_ORJSON:
|
||||
logger.critical(
|
||||
f"CORRUPTED WATCH DATA: Failed to parse JSON for watch {uuid}. "
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None, None
|
||||
# Re-raise if it's not a JSON parsing error
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
|
||||
return None, None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def load_all_watches(datastore_path, rehydrate_entity_func):
|
||||
"""
|
||||
Load all watches from individual watch.json files.
|
||||
|
||||
SYNCHRONOUS loading: Blocks until all watches are loaded.
|
||||
This ensures data consistency - web server won't accept requests
|
||||
until all watches are available. Progress logged every 100 watches.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
rehydrate_entity_func: Function to convert dict to Watch object
|
||||
|
||||
Returns:
|
||||
Dictionary of uuid -> Watch object
|
||||
"""
|
||||
start_time = time.time()
|
||||
logger.info("Loading watches from individual watch.json files...")
|
||||
|
||||
watching = {}
|
||||
|
||||
if not os.path.exists(datastore_path):
|
||||
return watching
|
||||
|
||||
# Find all watch.json files using glob (faster than manual directory traversal)
|
||||
glob_start = time.time()
|
||||
watch_files = glob.glob(os.path.join(datastore_path, "*", "watch.json"))
|
||||
glob_time = time.time() - glob_start
|
||||
|
||||
total = len(watch_files)
|
||||
logger.debug(f"Found {total} watch.json files in {glob_time:.3f}s")
|
||||
|
||||
loaded = 0
|
||||
failed = 0
|
||||
|
||||
for watch_json in watch_files:
|
||||
# Extract UUID from path: /datastore/{uuid}/watch.json
|
||||
uuid_dir = os.path.basename(os.path.dirname(watch_json))
|
||||
watch, raw_data = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
|
||||
if watch and raw_data:
|
||||
watching[uuid_dir] = watch
|
||||
loaded += 1
|
||||
|
||||
if loaded % 100 == 0:
|
||||
logger.info(f"Loaded {loaded}/{total} watches...")
|
||||
else:
|
||||
# load_watch_from_file already logged the specific error
|
||||
failed += 1
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if failed > 0:
|
||||
logger.critical(
|
||||
f"LOAD COMPLETE: {loaded} watches loaded successfully, "
|
||||
f"{failed} watches FAILED to load (corrupted or invalid) "
|
||||
f"in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)"
|
||||
)
|
||||
else:
|
||||
logger.info(f"Loaded {loaded} watches from disk in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)")
|
||||
|
||||
return watching
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# FileSavingDataStore Class
|
||||
# ============================================================================
|
||||
|
||||
class FileSavingDataStore(DataStore):
|
||||
"""
|
||||
Abstract datastore that provides file persistence with immediate commits.
|
||||
|
||||
Features:
|
||||
- Individual watch.json files (one per watch)
|
||||
- Immediate persistence via watch.commit() and datastore.commit()
|
||||
- Atomic file writes for crash safety
|
||||
|
||||
Subclasses must implement:
|
||||
- rehydrate_entity(): Convert dict to Watch object
|
||||
- Access to internal __data structure for watch management
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def _save_settings(self):
|
||||
"""
|
||||
Save settings to storage (polymorphic).
|
||||
|
||||
Subclasses must implement for their backend.
|
||||
- File: changedetection.json
|
||||
- Redis: SET settings
|
||||
- SQL: UPDATE settings table
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _save_settings")
|
||||
|
||||
|
||||
def _load_watches(self):
|
||||
"""
|
||||
Load all watches from storage (polymorphic).
|
||||
|
||||
Subclasses must implement for their backend.
|
||||
- File: Read individual watch.json files
|
||||
- Redis: SCAN watch:* keys
|
||||
- SQL: SELECT * FROM watches
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _load_watches")
|
||||
|
||||
def _delete_watch(self, uuid):
|
||||
"""
|
||||
Delete a watch from storage (polymorphic).
|
||||
|
||||
Subclasses must implement for their backend.
|
||||
- File: Delete {uuid}/ directory recursively
|
||||
- Redis: DEL watch:{uuid}
|
||||
- SQL: DELETE FROM watches WHERE uuid=?
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID to delete
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _delete_watch")
|
||||
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
"""
|
||||
Legacy format loader for url-watches.json.
|
||||
|
||||
Provides functions to detect and load from the legacy monolithic JSON format.
|
||||
Used during migration (update_26) to transition to individual watch.json files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from loguru import logger
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
HAS_ORJSON = True
|
||||
except ImportError:
|
||||
HAS_ORJSON = False
|
||||
|
||||
|
||||
def has_legacy_datastore(datastore_path):
|
||||
"""
|
||||
Check if a legacy url-watches.json file exists.
|
||||
|
||||
This is used by update_26 to determine if migration is needed.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to datastore directory
|
||||
|
||||
Returns:
|
||||
bool: True if url-watches.json exists
|
||||
"""
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
return os.path.exists(url_watches_json)
|
||||
|
||||
|
||||
def load_legacy_format(json_store_path):
|
||||
"""
|
||||
Load datastore from legacy url-watches.json format.
|
||||
|
||||
Args:
|
||||
json_store_path: Full path to url-watches.json file
|
||||
|
||||
Returns:
|
||||
dict: Loaded datastore data with 'watching', 'settings', etc.
|
||||
None: If file doesn't exist or loading failed
|
||||
"""
|
||||
logger.info(f"Loading from legacy format: {json_store_path}")
|
||||
|
||||
if not os.path.isfile(json_store_path):
|
||||
logger.warning(f"Legacy file not found: {json_store_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
if HAS_ORJSON:
|
||||
with open(json_store_path, 'rb') as f:
|
||||
data = orjson.loads(f.read())
|
||||
else:
|
||||
with open(json_store_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
logger.info(f"Loaded {len(data.get('watching', {}))} watches from legacy format")
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load legacy format: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,686 @@
|
||||
"""
|
||||
Schema update migrations for the datastore.
|
||||
|
||||
This module contains all schema version upgrade methods (update_1 through update_N).
|
||||
These are mixed into ChangeDetectionStore to keep the main store file focused.
|
||||
|
||||
IMPORTANT: Each update could be run even when they have a new install and the schema is correct.
|
||||
Therefore - each `update_n` should be very careful about checking if it needs to actually run.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tarfile
|
||||
import time
|
||||
from loguru import logger
|
||||
from copy import deepcopy
|
||||
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
from ..processors.restock_diff import Restock
|
||||
from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
|
||||
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from .file_saving_datastore import save_watch_atomic
|
||||
|
||||
|
||||
def create_backup_tarball(datastore_path, update_number):
|
||||
"""
|
||||
Create a tarball backup of the entire datastore structure before running an update.
|
||||
|
||||
Includes:
|
||||
- All {uuid}/watch.json files
|
||||
- changedetection.json (settings, if it exists)
|
||||
- url-watches.json (legacy format, if it exists)
|
||||
- Directory structure preserved
|
||||
|
||||
Args:
|
||||
datastore_path: Path to datastore directory
|
||||
update_number: Update number being applied
|
||||
|
||||
Returns:
|
||||
str: Path to created tarball, or None if backup failed
|
||||
|
||||
Restoration:
|
||||
To restore from a backup:
|
||||
cd /path/to/datastore
|
||||
tar -xzf before-update-N-timestamp.tar.gz
|
||||
This will restore all watch.json files and settings to their pre-update state.
|
||||
"""
|
||||
timestamp = int(time.time())
|
||||
backup_filename = f"before-update-{update_number}-{timestamp}.tar.gz"
|
||||
backup_path = os.path.join(datastore_path, backup_filename)
|
||||
|
||||
try:
|
||||
logger.info(f"Creating backup tarball: {backup_filename}")
|
||||
|
||||
with tarfile.open(backup_path, "w:gz") as tar:
|
||||
# Backup changedetection.json if it exists (new format)
|
||||
changedetection_json = os.path.join(datastore_path, "changedetection.json")
|
||||
if os.path.isfile(changedetection_json):
|
||||
tar.add(changedetection_json, arcname="changedetection.json")
|
||||
logger.debug("Added changedetection.json to backup")
|
||||
|
||||
# Backup url-watches.json if it exists (legacy format)
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
if os.path.isfile(url_watches_json):
|
||||
tar.add(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Backup all watch directories with their watch.json files
|
||||
# This preserves the UUID directory structure
|
||||
watch_count = 0
|
||||
for entry in os.listdir(datastore_path):
|
||||
entry_path = os.path.join(datastore_path, entry)
|
||||
|
||||
# Skip if not a directory
|
||||
if not os.path.isdir(entry_path):
|
||||
continue
|
||||
|
||||
# Skip hidden directories and backup directories
|
||||
if entry.startswith('.') or entry.startswith('before-update-'):
|
||||
continue
|
||||
|
||||
# Check if this directory has a watch.json (indicates it's a watch UUID directory)
|
||||
watch_json = os.path.join(entry_path, "watch.json")
|
||||
if os.path.isfile(watch_json):
|
||||
# Add the watch.json file preserving directory structure
|
||||
tar.add(watch_json, arcname=f"{entry}/watch.json")
|
||||
watch_count += 1
|
||||
|
||||
if watch_count % 100 == 0:
|
||||
logger.debug(f"Backed up {watch_count} watch.json files...")
|
||||
|
||||
logger.success(f"Backup created: {backup_filename} ({watch_count} watches)")
|
||||
return backup_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create backup tarball: {e}")
|
||||
# Try to clean up partial backup
|
||||
if os.path.exists(backup_path):
|
||||
try:
|
||||
os.unlink(backup_path)
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
class DatastoreUpdatesMixin:
|
||||
"""
|
||||
Mixin class containing all schema update methods.
|
||||
|
||||
This class is inherited by ChangeDetectionStore to provide schema migration functionality.
|
||||
Each update_N method upgrades the schema from version N-1 to version N.
|
||||
"""
|
||||
|
||||
def get_updates_available(self):
|
||||
"""
|
||||
Discover all available update methods.
|
||||
|
||||
Returns:
|
||||
list: Sorted list of update version numbers (e.g., [1, 2, 3, ..., 26])
|
||||
"""
|
||||
import inspect
|
||||
updates_available = []
|
||||
for i, o in inspect.getmembers(self, predicate=inspect.ismethod):
|
||||
m = re.search(r'update_(\d+)$', i)
|
||||
if m:
|
||||
updates_available.append(int(m.group(1)))
|
||||
updates_available.sort()
|
||||
|
||||
return updates_available
|
||||
|
||||
def run_updates(self, current_schema_version=None):
|
||||
"""
|
||||
Run all pending schema updates sequentially.
|
||||
|
||||
Args:
|
||||
current_schema_version: Optional current schema version. If provided, only run updates
|
||||
greater than this version. If None, uses the schema version from
|
||||
the datastore. If no schema version exists in datastore and it appears
|
||||
to be a fresh install, sets to latest update number (no updates needed).
|
||||
|
||||
IMPORTANT: Each update could be run even when they have a new install and the schema is correct.
|
||||
Therefore - each `update_n` should be very careful about checking if it needs to actually run.
|
||||
|
||||
Process:
|
||||
1. Get list of available updates
|
||||
2. For each update > current schema version:
|
||||
- Create backup of datastore
|
||||
- Run update method
|
||||
- Update schema version
|
||||
- Mark settings and watches dirty
|
||||
3. If any update fails, stop processing
|
||||
4. Save all changes immediately
|
||||
"""
|
||||
updates_available = self.get_updates_available()
|
||||
|
||||
# Determine current schema version
|
||||
if current_schema_version is None:
|
||||
# Check if schema_version exists in datastore
|
||||
current_schema_version = self.data['settings']['application'].get('schema_version')
|
||||
|
||||
if current_schema_version is None:
|
||||
# No schema version found - could be a fresh install or very old datastore
|
||||
# If this is a fresh/new config with no watches, assume it's up-to-date
|
||||
# and set to latest update number (no updates needed)
|
||||
if len(self.data['watching']) == 0:
|
||||
# Get the highest update number from available update methods
|
||||
latest_update = updates_available[-1] if updates_available else 0
|
||||
logger.info(f"No schema version found and no watches exist - assuming fresh install, setting schema_version to {latest_update}")
|
||||
self.data['settings']['application']['schema_version'] = latest_update
|
||||
self.commit()
|
||||
return # No updates needed for fresh install
|
||||
else:
|
||||
# Has watches but no schema version - likely old datastore, run all updates
|
||||
logger.warning("No schema version found but watches exist - running all updates from version 0")
|
||||
current_schema_version = 0
|
||||
|
||||
logger.info(f"Current schema version: {current_schema_version}")
|
||||
|
||||
updates_ran = []
|
||||
|
||||
for update_n in updates_available:
|
||||
if update_n > current_schema_version:
|
||||
logger.critical(f"Applying update_{update_n}")
|
||||
|
||||
# Create tarball backup of entire datastore structure
|
||||
# This includes all watch.json files, settings, and preserves directory structure
|
||||
backup_path = create_backup_tarball(self.datastore_path, update_n)
|
||||
if backup_path:
|
||||
logger.info(f"Backup created at: {backup_path}")
|
||||
else:
|
||||
logger.warning("Backup creation failed, but continuing with update")
|
||||
|
||||
try:
|
||||
update_method = getattr(self, f"update_{update_n}")()
|
||||
except Exception as e:
|
||||
logger.error(f"Error while trying update_{update_n}")
|
||||
logger.error(e)
|
||||
# Don't run any more updates
|
||||
return
|
||||
else:
|
||||
# Bump the version, important
|
||||
self.data['settings']['application']['schema_version'] = update_n
|
||||
self.commit()
|
||||
|
||||
# CRITICAL: Save all watches so changes are persisted
|
||||
# Most updates modify watches, and in the new individual watch.json structure,
|
||||
# we need to ensure those changes are saved
|
||||
logger.info(f"Saving all {len(self.data['watching'])} watches after update_{update_n} (so that it saves them to disk)")
|
||||
for uuid in self.data['watching'].keys():
|
||||
self.data['watching'][uuid].commit()
|
||||
|
||||
# Save changes immediately after each update (more resilient than batching)
|
||||
logger.critical(f"Saving all changes after update_{update_n}")
|
||||
try:
|
||||
self._save_dirty_items()
|
||||
logger.success(f"Update {update_n} changes saved successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save update_{update_n} changes: {e}")
|
||||
# Don't raise - update already ran, but changes might not be persisted
|
||||
# The update will try to run again on next startup
|
||||
|
||||
# Track which updates ran
|
||||
updates_ran.append(update_n)
|
||||
|
||||
# ============================================================================
|
||||
# Individual Update Methods
|
||||
# ============================================================================
|
||||
|
||||
def update_1(self):
|
||||
"""Convert minutes to seconds on settings and each watch."""
|
||||
if self.data['settings']['requests'].get('minutes_between_check'):
|
||||
self.data['settings']['requests']['time_between_check']['minutes'] = self.data['settings']['requests']['minutes_between_check']
|
||||
# Remove the default 'hours' that is set from the model
|
||||
self.data['settings']['requests']['time_between_check']['hours'] = None
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if 'minutes_between_check' in watch:
|
||||
# Only upgrade individual watch time if it was set
|
||||
if watch.get('minutes_between_check', False):
|
||||
self.data['watching'][uuid]['time_between_check']['minutes'] = watch['minutes_between_check']
|
||||
|
||||
def update_2(self):
|
||||
"""
|
||||
Move the history list to a flat text file index.
|
||||
Better than SQLite because this list is only appended to, and works across NAS / NFS type setups.
|
||||
"""
|
||||
# @todo test running this on a newly updated one (when this already ran)
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
history = []
|
||||
|
||||
if watch.get('history', False):
|
||||
for d, p in watch['history'].items():
|
||||
d = int(d) # Used to be keyed as str, we'll fix this now too
|
||||
history.append("{},{}\n".format(d, p))
|
||||
|
||||
if len(history):
|
||||
target_path = os.path.join(self.datastore_path, uuid)
|
||||
if os.path.exists(target_path):
|
||||
with open(os.path.join(target_path, "history.txt"), "w") as f:
|
||||
f.writelines(history)
|
||||
else:
|
||||
logger.warning(f"Datastore history directory {target_path} does not exist, skipping history import.")
|
||||
|
||||
# No longer needed, dynamically pulled from the disk when needed.
|
||||
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
|
||||
# In the distant future we can remove this entirely
|
||||
self.data['watching'][uuid]['history'] = {}
|
||||
|
||||
def update_3(self):
|
||||
"""We incorrectly stored last_changed when there was not a change, and then confused the output list table."""
|
||||
# see https://github.com/dgtlmoon/changedetection.io/pull/835
|
||||
return
|
||||
|
||||
def update_4(self):
|
||||
"""`last_changed` not needed, we pull that information from the history.txt index."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
# Remove it from the struct
|
||||
del(watch['last_changed'])
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_5(self):
|
||||
"""
|
||||
If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings.
|
||||
In other words - the watch notification_title and notification_body are not needed if they are the same as the default one.
|
||||
"""
|
||||
current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
watch_body = watch.get('notification_body', '')
|
||||
if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body:
|
||||
# Looks the same as the default one, so unset it
|
||||
watch['notification_body'] = None
|
||||
|
||||
watch_title = watch.get('notification_title', '')
|
||||
if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title:
|
||||
# Looks the same as the default one, so unset it
|
||||
watch['notification_title'] = None
|
||||
except Exception as e:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_7(self):
|
||||
"""
|
||||
We incorrectly used common header overrides that should only apply to Requests.
|
||||
These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium.
|
||||
"""
|
||||
# These were hard-coded in early versions
|
||||
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
|
||||
if self.data['settings']['headers'].get(v):
|
||||
del self.data['settings']['headers'][v]
|
||||
|
||||
def update_8(self):
|
||||
"""Convert filters to a list of filters css_filter -> include_filters."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
existing_filter = watch.get('css_filter', '')
|
||||
if existing_filter:
|
||||
watch['include_filters'] = [existing_filter]
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_9(self):
|
||||
"""Convert old static notification tokens to jinja2 tokens."""
|
||||
# Each watch
|
||||
# only { } not {{ or }}
|
||||
r = r'(?<!{){(?!{)(\w+)(?<!})}(?!})'
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
n_body = watch.get('notification_body', '')
|
||||
if n_body:
|
||||
watch['notification_body'] = re.sub(r, r'{{\1}}', n_body)
|
||||
|
||||
n_title = watch.get('notification_title')
|
||||
if n_title:
|
||||
watch['notification_title'] = re.sub(r, r'{{\1}}', n_title)
|
||||
|
||||
n_urls = watch.get('notification_urls')
|
||||
if n_urls:
|
||||
for i, url in enumerate(n_urls):
|
||||
watch['notification_urls'][i] = re.sub(r, r'{{\1}}', url)
|
||||
|
||||
except:
|
||||
continue
|
||||
|
||||
# System wide
|
||||
n_body = self.data['settings']['application'].get('notification_body')
|
||||
if n_body:
|
||||
self.data['settings']['application']['notification_body'] = re.sub(r, r'{{\1}}', n_body)
|
||||
|
||||
n_title = self.data['settings']['application'].get('notification_title')
|
||||
if n_body:
|
||||
self.data['settings']['application']['notification_title'] = re.sub(r, r'{{\1}}', n_title)
|
||||
|
||||
n_urls = self.data['settings']['application'].get('notification_urls')
|
||||
if n_urls:
|
||||
for i, url in enumerate(n_urls):
|
||||
self.data['settings']['application']['notification_urls'][i] = re.sub(r, r'{{\1}}', url)
|
||||
|
||||
return
|
||||
|
||||
def update_10(self):
|
||||
"""Some setups may have missed the correct default, so it shows the wrong config in the UI, although it will default to system-wide."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
if not watch.get('fetch_backend', ''):
|
||||
watch['fetch_backend'] = 'system'
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_12(self):
|
||||
"""Create tag objects and their references from existing tag text."""
|
||||
i = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
# Split out and convert old tag string
|
||||
tag = watch.get('tag')
|
||||
if tag:
|
||||
tag_uuids = []
|
||||
for t in tag.split(','):
|
||||
tag_uuids.append(self.add_tag(title=t))
|
||||
|
||||
self.data['watching'][uuid]['tags'] = tag_uuids
|
||||
|
||||
def update_13(self):
|
||||
"""#1775 - Update 11 did not update the records correctly when adding 'date_created' values for sorting."""
|
||||
i = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if not watch.get('date_created'):
|
||||
self.data['watching'][uuid]['date_created'] = i
|
||||
i += 1
|
||||
return
|
||||
|
||||
def update_14(self):
|
||||
"""#1774 - protect xpath1 against migration."""
|
||||
for awatch in self.data["watching"]:
|
||||
if self.data["watching"][awatch]['include_filters']:
|
||||
for num, selector in enumerate(self.data["watching"][awatch]['include_filters']):
|
||||
if selector.startswith('/'):
|
||||
self.data["watching"][awatch]['include_filters'][num] = 'xpath1:' + selector
|
||||
if selector.startswith('xpath:'):
|
||||
self.data["watching"][awatch]['include_filters'][num] = selector.replace('xpath:', 'xpath1:', 1)
|
||||
|
||||
def update_15(self):
|
||||
"""Use more obvious default time setting."""
|
||||
for uuid in self.data["watching"]:
|
||||
if self.data["watching"][uuid]['time_between_check'] == self.data['settings']['requests']['time_between_check']:
|
||||
# What the old logic was, which was pretty confusing
|
||||
self.data["watching"][uuid]['time_between_check_use_default'] = True
|
||||
elif all(value is None or value == 0 for value in self.data["watching"][uuid]['time_between_check'].values()):
|
||||
self.data["watching"][uuid]['time_between_check_use_default'] = True
|
||||
else:
|
||||
# Something custom here
|
||||
self.data["watching"][uuid]['time_between_check_use_default'] = False
|
||||
|
||||
def update_16(self):
|
||||
"""Correctly set datatype for older installs where 'tag' was string and update_12 did not catch it."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if isinstance(watch.get('tags'), str):
|
||||
self.data['watching'][uuid]['tags'] = []
|
||||
|
||||
def update_17(self):
|
||||
"""Migrate old 'in_stock' values to the new Restock."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if 'in_stock' in watch:
|
||||
watch['restock'] = Restock({'in_stock': watch.get('in_stock')})
|
||||
del watch['in_stock']
|
||||
|
||||
def update_18(self):
|
||||
"""Migrate old restock settings."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if not watch.get('restock_settings'):
|
||||
# So we enable price following by default
|
||||
self.data['watching'][uuid]['restock_settings'] = {'follow_price_changes': True}
|
||||
|
||||
# Migrate and cleanoff old value
|
||||
self.data['watching'][uuid]['restock_settings']['in_stock_processing'] = 'in_stock_only' if watch.get(
|
||||
'in_stock_only') else 'all_changes'
|
||||
|
||||
if self.data['watching'][uuid].get('in_stock_only'):
|
||||
del (self.data['watching'][uuid]['in_stock_only'])
|
||||
|
||||
def update_19(self):
|
||||
"""Compress old elements.json to elements.deflate, saving disk, this compression is pretty fast."""
|
||||
import zlib
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
json_path = os.path.join(self.datastore_path, uuid, "elements.json")
|
||||
deflate_path = os.path.join(self.datastore_path, uuid, "elements.deflate")
|
||||
|
||||
if os.path.exists(json_path):
|
||||
with open(json_path, "rb") as f_j:
|
||||
with open(deflate_path, "wb") as f_d:
|
||||
logger.debug(f"Compressing {str(json_path)} to {str(deflate_path)}..")
|
||||
f_d.write(zlib.compress(f_j.read()))
|
||||
os.unlink(json_path)
|
||||
|
||||
def update_20(self):
|
||||
"""Migrate extract_title_as_title to use_page_title_in_list."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if self.data['watching'][uuid].get('extract_title_as_title'):
|
||||
self.data['watching'][uuid]['use_page_title_in_list'] = self.data['watching'][uuid].get('extract_title_as_title')
|
||||
del self.data['watching'][uuid]['extract_title_as_title']
|
||||
|
||||
if self.data['settings']['application'].get('extract_title_as_title'):
|
||||
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
|
||||
|
||||
def update_21(self):
|
||||
"""Migrate timezone to scheduler_timezone_default."""
|
||||
if self.data['settings']['application'].get('timezone'):
|
||||
self.data['settings']['application']['scheduler_timezone_default'] = self.data['settings']['application'].get('timezone')
|
||||
del self.data['settings']['application']['timezone']
|
||||
|
||||
def update_23(self):
|
||||
"""Some notification formats got the wrong name type."""
|
||||
|
||||
def re_run(formats):
|
||||
sys_n_format = self.data['settings']['application'].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == sys_n_format), None)
|
||||
if key_exists_as_value: # key of "Plain text"
|
||||
logger.success(f"['settings']['application']['notification_format'] '{sys_n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['settings']['application']['notification_format'] = key_exists_as_value
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
n_format = self.data['watching'][uuid].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
|
||||
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
|
||||
logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['watching'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever
|
||||
|
||||
for uuid, tag in self.data['settings']['application']['tags'].items():
|
||||
n_format = self.data['settings']['application']['tags'][uuid].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
|
||||
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
|
||||
logger.success(
|
||||
f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['settings']['application']['tags'][uuid][
|
||||
'notification_format'] = key_exists_as_value # should be 'text' or whatever
|
||||
|
||||
from ..notification import valid_notification_formats
|
||||
formats = deepcopy(valid_notification_formats)
|
||||
re_run(formats)
|
||||
# And in previous versions, it was "text" instead of Plain text, Markdown instead of "Markdown to HTML"
|
||||
formats['text'] = 'Text'
|
||||
formats['markdown'] = 'Markdown'
|
||||
re_run(formats)
|
||||
|
||||
def update_24(self):
|
||||
"""RSS types should be inline with the same names as notification types."""
|
||||
rss_format = self.data['settings']['application'].get('rss_content_format')
|
||||
if not rss_format or 'text' in rss_format:
|
||||
# might have been 'plaintext, 'plain text' or something
|
||||
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
|
||||
elif 'html' in rss_format:
|
||||
self.data['settings']['application']['rss_content_format'] = 'htmlcolor'
|
||||
else:
|
||||
# safe fallback to text
|
||||
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
|
||||
|
||||
def update_25(self):
|
||||
"""Different processors now hold their own history.txt."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
processor = self.data['watching'][uuid].get('processor')
|
||||
if processor != 'text_json_diff':
|
||||
old_history_txt = os.path.join(self.datastore_path, "history.txt")
|
||||
target_history_name = f"history-{processor}.txt"
|
||||
if os.path.isfile(old_history_txt) and not os.path.isfile(target_history_name):
|
||||
new_history_txt = os.path.join(self.datastore_path, target_history_name)
|
||||
logger.debug(f"Renaming history index {old_history_txt} to {new_history_txt}...")
|
||||
shutil.move(old_history_txt, new_history_txt)
|
||||
|
||||
def migrate_legacy_db_format(self):
|
||||
"""
|
||||
Migration: Individual watch persistence (COPY-based, safe rollback).
|
||||
|
||||
Loads legacy url-watches.json format and migrates to:
|
||||
- {uuid}/watch.json (per watch)
|
||||
- changedetection.json (settings only)
|
||||
|
||||
IMPORTANT:
|
||||
- A tarball backup (before-update-26-timestamp.tar.gz) is created before migration
|
||||
- url-watches.json is LEFT INTACT for rollback safety
|
||||
- Users can roll back by simply downgrading to the previous version
|
||||
- Or restore from tarball: tar -xzf before-update-26-*.tar.gz
|
||||
|
||||
This is a dedicated migration release - users upgrade at their own pace.
|
||||
"""
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("Running migration: Individual watch persistence (update_26)")
|
||||
logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
# Check if already migrated
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
if os.path.exists(changedetection_json):
|
||||
logger.info("Migration already completed (changedetection.json exists), skipping")
|
||||
return
|
||||
|
||||
# Check if we need to load legacy data
|
||||
from .legacy_loader import has_legacy_datastore, load_legacy_format
|
||||
|
||||
if not has_legacy_datastore(self.datastore_path):
|
||||
logger.info("No legacy datastore found, nothing to migrate")
|
||||
return
|
||||
|
||||
# Load legacy data from url-watches.json
|
||||
logger.critical("Loading legacy datastore from url-watches.json...")
|
||||
legacy_path = os.path.join(self.datastore_path, "url-watches.json")
|
||||
legacy_data = load_legacy_format(legacy_path)
|
||||
|
||||
if not legacy_data:
|
||||
raise Exception("Failed to load legacy datastore from url-watches.json")
|
||||
|
||||
# Populate settings from legacy data
|
||||
logger.info("Populating settings from legacy data...")
|
||||
watch_count = len(self.data['watching'])
|
||||
logger.success(f"Loaded {watch_count} watches from legacy format")
|
||||
|
||||
# Phase 1: Save all watches to individual files
|
||||
logger.critical(f"Phase 1/4: Saving {watch_count} watches to individual watch.json files...")
|
||||
|
||||
saved_count = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
watch_dict = dict(watch)
|
||||
watch_dir = os.path.join(self.datastore_path, uuid)
|
||||
save_watch_atomic(watch_dir, uuid, watch_dict)
|
||||
saved_count += 1
|
||||
|
||||
if saved_count % 100 == 0:
|
||||
logger.info(f" Progress: {saved_count}/{watch_count} watches migrated...")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save watch {uuid}: {e}")
|
||||
raise Exception(
|
||||
f"Migration failed: Could not save watch {uuid}. "
|
||||
f"url-watches.json remains intact, safe to retry. Error: {e}"
|
||||
)
|
||||
|
||||
logger.critical(f"Phase 1 complete: Saved {saved_count} watches")
|
||||
|
||||
# Phase 2: Verify all files exist
|
||||
logger.critical("Phase 2/4: Verifying all watch.json files were created...")
|
||||
|
||||
missing = []
|
||||
for uuid in self.data['watching'].keys():
|
||||
watch_json = os.path.join(self.datastore_path, uuid, "watch.json")
|
||||
if not os.path.isfile(watch_json):
|
||||
missing.append(uuid)
|
||||
|
||||
if missing:
|
||||
raise Exception(
|
||||
f"Migration failed: {len(missing)} watch files missing: {missing[:5]}... "
|
||||
f"url-watches.json remains intact, safe to retry."
|
||||
)
|
||||
|
||||
logger.critical(f"Phase 2 complete: Verified {watch_count} watch files")
|
||||
|
||||
# Phase 3: Create new settings file
|
||||
logger.critical("Phase 3/4: Creating changedetection.json...")
|
||||
|
||||
try:
|
||||
self._save_settings()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create changedetection.json: {e}")
|
||||
raise Exception(
|
||||
f"Migration failed: Could not create changedetection.json. "
|
||||
f"url-watches.json remains intact, safe to retry. Error: {e}"
|
||||
)
|
||||
|
||||
# Phase 4: Verify settings file exists
|
||||
logger.critical("Phase 4/4: Verifying changedetection.json exists...")
|
||||
|
||||
if not os.path.isfile(changedetection_json):
|
||||
raise Exception(
|
||||
"Migration failed: changedetection.json not found after save. "
|
||||
"url-watches.json remains intact, safe to retry."
|
||||
)
|
||||
|
||||
logger.critical("Phase 4 complete: Verified changedetection.json exists")
|
||||
|
||||
# Success! Now reload from new format
|
||||
logger.critical("Reloading datastore from new format...")
|
||||
self._load_state() # Includes load_watches
|
||||
logger.success("Datastore reloaded from new format successfully")
|
||||
|
||||
|
||||
# Verify all watches have hashes after migration
|
||||
missing_hashes = [uuid for uuid in self.data['watching'].keys() if uuid not in self._watch_hashes]
|
||||
if missing_hashes:
|
||||
logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after migration: {missing_hashes[:5]}")
|
||||
else:
|
||||
logger.success(f"All {len(self.data['watching'])} watches have valid hashes after migration")
|
||||
|
||||
# Set schema version to latest available update
|
||||
# This prevents re-running updates and re-marking all watches as dirty
|
||||
updates_available = self.get_updates_available()
|
||||
latest_schema = updates_available[-1] if updates_available else 26
|
||||
self.data['settings']['application']['schema_version'] = latest_schema
|
||||
self.commit()
|
||||
logger.info(f"Set schema_version to {latest_schema} (migration complete, all watches already saved)")
|
||||
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
|
||||
logger.critical("=" * 80)
|
||||
logger.info("")
|
||||
logger.info("New format:")
|
||||
logger.info(f" - {watch_count} individual watch.json files created")
|
||||
logger.info(f" - changedetection.json created (settings only)")
|
||||
logger.info("")
|
||||
logger.info("Rollback safety:")
|
||||
logger.info(" - url-watches.json preserved for rollback")
|
||||
logger.info(" - To rollback: downgrade to previous version and restart")
|
||||
logger.info(" - No manual file operations needed")
|
||||
logger.info("")
|
||||
logger.info("Optional cleanup (after testing new version):")
|
||||
logger.info(f" - rm {os.path.join(self.datastore_path, 'url-watches.json')}")
|
||||
logger.info("")
|
||||
|
||||
def update_26(self):
|
||||
self.migrate_legacy_db_format()
|
||||
@@ -6,94 +6,93 @@
|
||||
|
||||
<div class="pure-controls">
|
||||
<span class="pure-form-message-inline">
|
||||
Body for all notifications ‐ You can use <a target="newwindow" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL, and tokens from below.
|
||||
{{ _('Body for all notifications — You can use') }} <a target="newwindow" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> {{ _('templating in the notification title, body and URL, and tokens from below.') }}
|
||||
</span><br>
|
||||
<div data-target="#notification-tokens-info{{ suffix }}" class="toggle-show pure-button button-tag button-xsmall">Show
|
||||
token/placeholders
|
||||
<div data-target="#notification-tokens-info{{ suffix }}" class="toggle-show pure-button button-tag button-xsmall">{{ _('Show token/placeholders') }}
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-controls" style="display: none;" id="notification-tokens-info{{ suffix }}">
|
||||
<table class="pure-table" id="token-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Token</th>
|
||||
<th>Description</th>
|
||||
<th>{{ _('Token') }}</th>
|
||||
<th>{{ _('Description') }}</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td><code>{{ '{{base_url}}' }}</code></td>
|
||||
<td>The URL of the changedetection.io instance you are running.</td>
|
||||
<td>{{ _('The URL of the changedetection.io instance you are running.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{watch_url}}' }}</code></td>
|
||||
<td>The URL being watched.</td>
|
||||
<td>{{ _('The URL being watched.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{watch_uuid}}' }}</code></td>
|
||||
<td>The UUID of the watch.</td>
|
||||
<td>{{ _('The UUID of the watch.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{watch_title}}' }}</code></td>
|
||||
<td>The page title of the watch, uses <title> if not set, falls back to URL</td>
|
||||
<td>{{ _('The page title of the watch, uses <title> if not set, falls back to URL') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{watch_tag}}' }}</code></td>
|
||||
<td>The watch group / tag</td>
|
||||
<td>{{ _('The watch group / tag') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{preview_url}}' }}</code></td>
|
||||
<td>The URL of the preview page generated by changedetection.io.</td>
|
||||
<td>{{ _('The URL of the preview page generated by changedetection.io.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_url}}' }}</code></td>
|
||||
<td>The URL of the diff output for the watch.</td>
|
||||
<td>{{ _('The URL of the diff output for the watch.') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff}}' }}</code></td>
|
||||
<td>The diff output - only changes, additions, and removals</td>
|
||||
<td>{{ _('The diff output - only changes, additions, and removals') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_clean}}' }}</code></td>
|
||||
<td>The diff output - only changes, additions, and removals ‐ <i>Without (added) prefix or colors</i>
|
||||
<td>{{ _('The diff output - only changes, additions, and removals —') }} <i>{{ _('Without (added) prefix or colors') }}</i>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_added}}' }}</code></td>
|
||||
<td>The diff output - only changes and additions</td>
|
||||
<td>{{ _('The diff output - only changes and additions') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_added_clean}}' }}</code></td>
|
||||
<td>The diff output - only changes and additions ‐ <i>Without (added) prefix or colors</i></td>
|
||||
<td>{{ _('The diff output - only changes and additions —') }} <i>{{ _('Without (added) prefix or colors') }}</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_removed}}' }}</code></td>
|
||||
<td>The diff output - only changes and removals</td>
|
||||
<td>{{ _('The diff output - only changes and removals') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_removed_clean}}' }}</code></td>
|
||||
<td>The diff output - only changes and removals ‐ <i>Without (added) prefix or colors</i></td>
|
||||
<td>{{ _('The diff output - only changes and removals —') }} <i>{{ _('Without (added) prefix or colors') }}</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_full}}' }}</code></td>
|
||||
<td>The diff output - full difference output</td>
|
||||
<td>{{ _('The diff output - full difference output') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_full_clean}}' }}</code></td>
|
||||
<td>The diff output - full difference output ‐ <i>Without (added) prefix or colors</i></td>
|
||||
<td>{{ _('The diff output - full difference output —') }} <i>{{ _('Without (added) prefix or colors') }}</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{diff_patch}}' }}</code></td>
|
||||
<td>The diff output - patch in unified format</td>
|
||||
<td>{{ _('The diff output - patch in unified format') }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{current_snapshot}}' }}</code></td>
|
||||
<td>The current snapshot text contents value, useful when combined with JSON or CSS filters
|
||||
<td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{triggered_text}}' }}</code></td>
|
||||
<td>Text that tripped the trigger from filters</td>
|
||||
<td>{{ _('Text that tripped the trigger from filters') }}</td>
|
||||
|
||||
{% if extra_notification_token_placeholder_info %}
|
||||
{% for token in extra_notification_token_placeholder_info %}
|
||||
@@ -107,8 +106,8 @@
|
||||
</table>
|
||||
|
||||
<span class="pure-form-message-inline">
|
||||
Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. <br>
|
||||
For example, an addition or removal could be perceived as a change in some cases. <a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
|
||||
{{ _('Warning: Contents of') }} <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, {{ _('and') }} <code>{{ '{{diff_added}}' }}</code> {{ _('depend on how the difference algorithm perceives the change.') }} <br>
|
||||
{{ _('For example, an addition or removal could be perceived as a change in some cases.') }} <a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">{{ _('More Here') }}</a> <br>
|
||||
</span>
|
||||
</div>
|
||||
{% endmacro %}
|
||||
@@ -124,32 +123,32 @@
|
||||
}}
|
||||
<div class="pure-form-message-inline">
|
||||
<p>
|
||||
<strong>Tip:</strong> Use <a target="newwindow" href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
|
||||
<strong>{{ _('Tip:') }}</strong> {{ _('Use') }} <a target="newwindow" href="https://github.com/caronc/apprise">{{ _('AppRise Notification URLs') }}</a> {{ _('for notification to just about any service!') }} <i><a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">{{ _('Please read the notification services wiki here for important configuration notes') }}</a></i>.<br>
|
||||
</p>
|
||||
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
||||
<div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">{{ _('Show advanced help and tips') }}</div>
|
||||
<ul style="display: none" id="advanced-help-notifications">
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
|
||||
<li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> for direct API calls (or omit the "<code>s</code>" for non-SSL ie <code>get://</code>) <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes#postposts">more help here</a></li>
|
||||
<li>Accepts the <code>{{ '{{token}}' }}</code> placeholders listed below</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> {{ _('(or') }} <code>https://discord.com/api/webhooks...</code>)) {{ _('only supports a maximum') }} <strong>{{ _('2,000 characters') }}</strong> {{ _('of notification text, including the title.') }}</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> {{ _('bots can\'t send messages to other bots, so you should specify chat ID of non-bot user.') }}</li>
|
||||
<li><code><a target="newwindow" href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> {{ _('only supports very limited HTML and can fail when extra tags are sent,') }} <a href="https://core.telegram.org/bots/api#html-style">{{ _('read more here') }}</a> {{ _('(or use plaintext/markdown format)') }}</li>
|
||||
<li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> {{ _('for direct API calls (or omit the') }} "<code>s</code>" {{ _('for non-SSL ie') }} <code>get://</code>) <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes#postposts">{{ _('more help here') }}</a></li>
|
||||
<li>{{ _('Accepts the') }} <code>{{ '{{token}}' }}</code> {{ _('placeholders listed below') }}</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="notifications-wrapper">
|
||||
<a id="send-test-notification" class="pure-button button-secondary button-xsmall" >Send test notification</a> <div class="spinner" style="display: none;"></div>
|
||||
<a id="send-test-notification" class="pure-button button-secondary button-xsmall" >{{ _('Send test notification') }}</a> <div class="spinner" style="display: none;"></div>
|
||||
{% if emailprefix %}
|
||||
<a id="add-email-helper" class="pure-button button-secondary button-xsmall" >Add email <img style="height: 1em; display: inline-block" src="{{url_for('static_content', group='images', filename='email.svg')}}" alt="Add an email address"> </a>
|
||||
<a id="add-email-helper" class="pure-button button-secondary button-xsmall" >{{ _('Add email') }} <img style="height: 1em; display: inline-block" src="{{url_for('static_content', group='images', filename='email.svg')}}" alt="{{ _('Add an email address') }}"> </a>
|
||||
{% endif %}
|
||||
<a href="{{url_for('settings.notification_logs')}}" class="pure-button button-secondary button-xsmall" >Notification debug logs</a>
|
||||
<a href="{{url_for('settings.notification_logs')}}" class="pure-button button-secondary button-xsmall" >{{ _('Notification debug logs') }}</a>
|
||||
<br>
|
||||
<div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div>
|
||||
<div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">{{ _('Processing..') }}</span></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group grey-form-border">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.notification_title, class="m-d notification-title", placeholder=settings_application['notification_title']) }}
|
||||
<span class="pure-form-message-inline">Title for all notifications</span>
|
||||
<span class="pure-form-message-inline">{{ _('Title for all notifications') }}</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.notification_body , rows=5, class="notification-body", placeholder=settings_application['notification_body']) }}
|
||||
@@ -157,16 +156,16 @@
|
||||
<div class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li><span class="pure-form-message-inline">
|
||||
For JSON payloads, use <strong>|tojson</strong> without quotes for automatic escaping, for example - <code>{ "name": {{ '{{ watch_title|tojson }}' }} }</code>
|
||||
{{ _('For JSON payloads, use') }} <strong>|tojson</strong> {{ _('without quotes for automatic escaping, for example -') }} <code>{ "name": {{ '{{ watch_title|tojson }}' }} }</code>
|
||||
</span></li>
|
||||
<li><span class="pure-form-message-inline">
|
||||
URL encoding, use <strong>|urlencode</strong>, for example - <code>gets://hook-website.com/test.php?title={{ '{{ watch_title|urlencode }}' }}</code>
|
||||
{{ _('URL encoding, use') }} <strong>|urlencode</strong>, {{ _('for example -') }} <code>gets://hook-website.com/test.php?title={{ '{{ watch_title|urlencode }}' }}</code>
|
||||
</span></li>
|
||||
<li><span class="pure-form-message-inline">
|
||||
Regular-expression replace, use <strong>|regex_replace</strong>, for example - <code>{{ "{{ \"hello world 123\" | regex_replace('[0-9]+', 'no-more-numbers') }}" }}</code>
|
||||
{{ _('Regular-expression replace, use') }} <strong>|regex_replace</strong>, {{ _('for example -') }} <code>{{ "{{ \"hello world 123\" | regex_replace('[0-9]+', 'no-more-numbers') }}" }}</code>
|
||||
</span></li>
|
||||
<li><span class="pure-form-message-inline">
|
||||
For a complete reference of all Jinja2 built-in filters, users can refer to the <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters">https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters</a>
|
||||
{{ _('For a complete reference of all Jinja2 built-in filters, users can refer to the') }} <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters">https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters</a>
|
||||
</span></li>
|
||||
</ul>
|
||||
<br>
|
||||
@@ -174,7 +173,7 @@
|
||||
</div>
|
||||
<div class="">
|
||||
{{ render_field(form.notification_format , class="notification-format") }}
|
||||
<span class="pure-form-message-inline">Format for all notifications</span>
|
||||
<span class="pure-form-message-inline">{{ _('Format for all notifications') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endmacro %}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{% macro render_field(field) %}
|
||||
<div {% if field.errors or field.top_errors %} class="error" {% endif %}><label for="{{ field.id }}">{{ field.label.text | string | forceescape }}</label></div>
|
||||
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field.label }}</div>
|
||||
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}
|
||||
{% if field.top_errors %}
|
||||
top
|
||||
@@ -59,7 +59,7 @@
|
||||
|
||||
{% macro render_ternary_field(field, BooleanField=false) %}
|
||||
{% if BooleanField %}
|
||||
{% set _ = field.__setattr__('boolean_mode', true) %}
|
||||
{% set dummy = field.__setattr__('boolean_mode', true) %}
|
||||
{% endif %}
|
||||
<div class="ternary-field {% if field.errors %} error {% endif %}">
|
||||
<div class="ternary-field-label"><label for="{{ field.id }}">{{ field.label.text | string | forceescape }}</label></div>
|
||||
@@ -113,17 +113,17 @@
|
||||
|
||||
{% macro render_fieldlist_with_inline_errors(fieldlist) %}
|
||||
{# Specialized macro for FieldList(FormField(...)) that renders errors inline with each field #}
|
||||
<div {% if fieldlist.errors %} class="error" {% endif %}>{{ fieldlist.label }}</div>
|
||||
<div {% if fieldlist.errors %} class="error" {% endif %}>{{ _(fieldlist.label.text | string) }}</div>
|
||||
<div {% if fieldlist.errors %} class="error" {% endif %}>
|
||||
<ul id="{{ fieldlist.id }}">
|
||||
{% for entry in fieldlist %}
|
||||
<li {% if entry.errors %} class="error" {% endif %}>
|
||||
<label for="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}>{{ fieldlist.label.text }}-{{ loop.index0 }}</label>
|
||||
<label for="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}>{{ _(fieldlist.label.text | string) }}-{{ loop.index0 }}</label>
|
||||
<table id="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}>
|
||||
<tbody>
|
||||
{% for subfield in entry %}
|
||||
<tr {% if subfield.errors %} class="error" {% endif %}>
|
||||
<th {% if subfield.errors %} class="error" {% endif %}><label for="{{ subfield.id }}" {% if subfield.errors %} class="error" {% endif %}>{{ subfield.label.text }}</label></th>
|
||||
<th {% if subfield.errors %} class="error" {% endif %}><label for="{{ subfield.id }}" {% if subfield.errors %} class="error" {% endif %}>{{ subfield.label.text | string }}</label></th>
|
||||
<td {% if subfield.errors %} class="error" {% endif %}>
|
||||
{{ subfield(**kwargs)|safe }}
|
||||
{% if subfield.errors %}
|
||||
@@ -148,7 +148,7 @@
|
||||
<div class="fieldlist_formfields" id="{{ table_id }}">
|
||||
<div class="fieldlist-header">
|
||||
{% for subfield in fieldlist[0] %}
|
||||
<div class="fieldlist-header-cell">{{ subfield.label }}</div>
|
||||
<div class="fieldlist-header-cell">{{ subfield.label.text | string }}</div>
|
||||
{% endfor %}
|
||||
<div class="fieldlist-header-cell">{{ _('Actions') }}</div>
|
||||
</div>
|
||||
@@ -267,7 +267,7 @@
|
||||
</ul>
|
||||
<br>
|
||||
<span class="pure-form-message-inline">
|
||||
<a href="https://changedetection.io/tutorials">{{ _('More help and examples about using the scheduler') }}</a>
|
||||
<a href="https://changedetection.io/tutorial/checking-web-pages-changes-according-schedule">{{ _('More help and examples about using the scheduler') }}</a>
|
||||
</span>
|
||||
</div>
|
||||
{% else %}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="{{ get_locale() }}" data-darkmode="{{ get_darkmode_state() }}">
|
||||
<html lang="{{ get_locale()|replace('_', '-') }}" data-darkmode="{{ get_darkmode_state() }}">
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8" >
|
||||
@@ -27,7 +27,7 @@
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="{{url_for('static_content', group='favicons', filename='apple-touch-icon.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="{{url_for('static_content', group='favicons', filename='favicon-32x32.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="{{url_for('static_content', group='favicons', filename='favicon-16x16.png')}}">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}" crossorigin="use-credentials">
|
||||
<link rel="mask-icon" href="{{url_for('static_content', group='favicons', filename='safari-pinned-tab.svg')}}" color="#5bbad5">
|
||||
<link rel="shortcut icon" href="{{url_for('static_content', group='favicons', filename='favicon.ico')}}">
|
||||
<meta name="msapplication-TileColor" content="#da532c">
|
||||
@@ -265,6 +265,9 @@
|
||||
</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div>
|
||||
<a href="{{ url_for('ui.delete_locale_language_session_var_if_it_exists', redirect=request.path) }}" >{{ _('Auto-detect from browser') }}</a>
|
||||
</div>
|
||||
<div>
|
||||
{{ _('Language support is in beta, please help us improve by opening a PR on GitHub with any updates.') }}
|
||||
</div>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
|
||||
<li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
|
||||
<li>Each line is processed separately (think of each line as "OR")</li>
|
||||
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
|
||||
<li>{{ _('Text to wait for before triggering a change/notification, all text and regex are tested case-insensitive.') }}</li>
|
||||
<li>{{ _('Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this monitor') }}</li>
|
||||
<li>{{ _('Each line is processed separately (think of each line as "OR")') }}</li>
|
||||
<li>{{ _('Note: Wrap in forward slash / to use regex example:') }} <code>/foo\d/</code></li>
|
||||
</ul>
|
||||
</span>
|
||||
</div>
|
||||
@@ -20,10 +20,10 @@
|
||||
") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
<li>{{ _('Matching text will be ignored in the text snapshot (you can still see it but it wont trigger a change)') }}</li>
|
||||
<li>{{ _('Each line processed separately, any line matching will be ignored (removed before creating the checksum)') }}</li>
|
||||
<li>{{ _('Regular Expression support, wrap the entire line in forward slash') }} <code>/regex/</code></li>
|
||||
<li>{{ _('Changing this will affect the comparison checksum which may trigger an alert') }}</li>
|
||||
</ul>
|
||||
</span>
|
||||
<br><br>
|
||||
@@ -40,10 +40,10 @@ Not in stock
|
||||
Unavailable") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li>
|
||||
<li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
|
||||
<li>All lines here must not exist (think of each line as "OR")</li>
|
||||
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
|
||||
<li>{{ _('Block change-detection while this text is on the page, all text and regex are tested case-insensitive, good for waiting for when a product is available again') }}</li>
|
||||
<li>{{ _('Block text is processed from the result-text that comes out of any CSS/JSON Filters for this monitor') }}</li>
|
||||
<li>{{ _('All lines here must not exist (think of each line as "OR")') }}</li>
|
||||
<li>{{ _('Note: Wrap in forward slash / to use regex example:') }} <code>/foo\d/</code></li>
|
||||
</ul>
|
||||
</span>
|
||||
</div>
|
||||
@@ -55,17 +55,17 @@ Unavailable") }}
|
||||
keyword") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
|
||||
<li>{{ _('Extracts text in the final output (line by line) after other filters using regular expressions or string match:') }}
|
||||
<ul>
|
||||
<li>Regular expression ‐ example <code>/reports.+?2022/i</code></li>
|
||||
<li>Don't forget to consider the white-space at the start of a line <code>/.+?reports.+?2022/i</code></li>
|
||||
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
|
||||
<li>Keyword example ‐ example <code>Out of stock</code></li>
|
||||
<li>Use groups to extract just that text ‐ example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
|
||||
<li>Example - match lines containing a keyword <code>/.*icecream.*/</code></li>
|
||||
<li>{{ _('Regular expression - example') }} <code>/reports.+?2022/i</code></li>
|
||||
<li>{{ _('Don\'t forget to consider the white-space at the start of a line') }} <code>/.+?reports.+?2022/i</code></li>
|
||||
<li>{{ _('Use') }} <code>//(?aiLmsux))</code> {{ _('type flags (more') }} <a href="https://docs.python.org/3/library/re.html#index-15">{{ _('information here') }}</a>)<br></li>
|
||||
<li>{{ _('Keyword example - example') }} <code>Out of stock</code></li>
|
||||
<li>{{ _('Use groups to extract just that text - example') }} <code>/reports.+?(\d+)/i</code> {{ _('returns a list of years only') }}</li>
|
||||
<li>{{ _('Example - match lines containing a keyword') }} <code>/.*icecream.*/</code></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>One line per regular-expression/string match</li>
|
||||
<li>{{ _('One line per regular-expression/string match') }}</li>
|
||||
</ul>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@@ -13,8 +13,11 @@
|
||||
<li class="pure-menu-item menu-collapsible {% if request.endpoint.startswith('imports.') %}active{% endif %}">
|
||||
<a href="{{ url_for('imports.import_page') }}" class="pure-menu-link">{{ _('IMPORT') }}</a>
|
||||
</li>
|
||||
<li class="pure-menu-item menu-collapsible {% if request.endpoint.startswith('backups.') %}active{% endif %}">
|
||||
<a href="{{ url_for('backups.index') }}" class="pure-menu-link">{{ _('BACKUPS') }}</a>
|
||||
<li class="pure-menu-item" id="menu-pause">
|
||||
<a href="{{ url_for('settings.toggle_all_paused') }}" ><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="{% if all_paused %}{{ _('Resume automatic scheduling') }}{% else %}{{ _('Pause auto-queue scheduling of watches') }}{% endif %}" title="{% if all_paused %}{{ _('Scheduling is paused - click to resume') }}{% else %}{{ _('Pause auto-queue scheduling of watches') }}{% endif %}" class="icon icon-pause"{% if not all_paused %} style="opacity: 0.3"{% endif %}></a>
|
||||
</li>
|
||||
<li class="pure-menu-item " id="menu-mute">
|
||||
<a href="{{ url_for('settings.toggle_all_muted') }}" ><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{% if all_muted %}{{ _('Unmute notifications') }}{% else %}{{ _('Mute notifications') }}{% endif %}" title="{% if all_muted %}{{ _('Notifications are muted - click to unmute') }}{% else %}{{ _('Mute notifications') }}{% endif %}" class="icon icon-mute"{% if not all_muted %} style="opacity: 0.3"{% endif %}></a>
|
||||
</li>
|
||||
{% else %}
|
||||
<li class="pure-menu-item menu-collapsible">
|
||||
@@ -30,7 +33,7 @@
|
||||
|
||||
{% else %}
|
||||
<li class="pure-menu-item menu-collapsible">
|
||||
<a class="pure-menu-link" href="https://changedetection.io">Website Change Detection and Notification.</a>
|
||||
<a class="pure-menu-link" href="https://changedetection.io">{{ _('Website Change Detection and Notification.') }}</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
<li class="pure-menu-item menu-collapsible" id="inline-menu-extras-group">
|
||||
@@ -47,7 +50,9 @@
|
||||
<span class="visually-hidden">{{ _('Change language') }}</span>
|
||||
<span class="{{ get_flag_for_locale(get_locale()) }}" id="language-selector-flag"></span>
|
||||
</button>
|
||||
<a class="github-link" href="https://github.com/dgtlmoon/changedetection.io">
|
||||
<a class="github-link" href="https://github.com/dgtlmoon/changedetection.io"
|
||||
target="_blank"
|
||||
rel="noopener" >
|
||||
{% include "svgs/github.svg" %}
|
||||
</a>
|
||||
</li>
|
||||
|
||||
Executable
+265
@@ -0,0 +1,265 @@
|
||||
#!/bin/bash
|
||||
# Test script for CLI options - Parallel execution
|
||||
# Tests -u, -uN, -r, -b flags
|
||||
|
||||
set -u # Exit on undefined variables
|
||||
|
||||
# Color output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Test results directory (for parallel safety)
|
||||
TEST_RESULTS_DIR="/tmp/cli-test-results-$$"
|
||||
mkdir -p "$TEST_RESULTS_DIR"
|
||||
|
||||
# Cleanup function
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "=== Cleaning up test directories ==="
|
||||
rm -rf /tmp/cli-test-* 2>/dev/null || true
|
||||
rm -rf "$TEST_RESULTS_DIR" 2>/dev/null || true
|
||||
# Kill any hanging processes
|
||||
pkill -f "changedetection.py.*cli-test" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Helper to record test result
|
||||
record_result() {
|
||||
local test_num=$1
|
||||
local status=$2 # pass or fail
|
||||
local message=$3
|
||||
|
||||
echo "$status|$message" > "$TEST_RESULTS_DIR/test_${test_num}.result"
|
||||
}
|
||||
|
||||
# Run a test in background
|
||||
run_test() {
|
||||
local test_num=$1
|
||||
local test_name=$2
|
||||
local test_func=$3
|
||||
|
||||
(
|
||||
echo -e "${YELLOW}[Test $test_num]${NC} $test_name"
|
||||
if $test_func "$test_num"; then
|
||||
record_result "$test_num" "pass" "$test_name"
|
||||
echo -e "${GREEN}✓ PASS${NC}: $test_name"
|
||||
else
|
||||
record_result "$test_num" "fail" "$test_name"
|
||||
echo -e "${RED}✗ FAIL${NC}: $test_name"
|
||||
fi
|
||||
) &
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Test Functions (each runs independently)
|
||||
# =============================================================================
|
||||
|
||||
test_help_flag() {
|
||||
local test_id=$1
|
||||
timeout 3 python3 changedetection.py --help 2>&1 | grep -q "Add URLs on startup"
|
||||
}
|
||||
|
||||
test_version_flag() {
|
||||
local test_id=$1
|
||||
timeout 3 python3 changedetection.py --version 2>&1 | grep -qE "changedetection.io [0-9]+\.[0-9]+"
|
||||
}
|
||||
|
||||
test_single_url() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-single-${test_id}-$$"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C -u https://example.com -b &>/dev/null
|
||||
# Count watch directories (UUID directories containing watch.json)
|
||||
[ "$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' | wc -l)" -eq 1 ]
|
||||
}
|
||||
|
||||
test_multiple_urls() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-multi-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-u https://httpbin.org \
|
||||
-b &>/dev/null
|
||||
# Count watch directories (UUID directories containing watch.json)
|
||||
[ "$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' | wc -l)" -eq 3 ]
|
||||
}
|
||||
|
||||
test_url_with_options() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-opts-${test_id}-$$"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 '{"title":"Test Site","processor":"text_json_diff"}' \
|
||||
-b &>/dev/null
|
||||
# Check that at least one watch.json contains the title "Test Site"
|
||||
python3 -c "
|
||||
import json, glob, sys
|
||||
watch_files = glob.glob('$dir/*/watch.json')
|
||||
for wf in watch_files:
|
||||
with open(wf) as f:
|
||||
data = json.load(f)
|
||||
if data.get('title') == 'Test Site':
|
||||
sys.exit(0)
|
||||
sys.exit(1)
|
||||
"
|
||||
}
|
||||
|
||||
test_multiple_urls_with_options() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-multi-opts-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 '{"title":"Site One"}' \
|
||||
-u https://github.com \
|
||||
-u1 '{"title":"Site Two"}' \
|
||||
-b &>/dev/null
|
||||
# Check that we have 2 watches and both titles are present
|
||||
python3 -c "
|
||||
import json, glob, sys
|
||||
watch_files = glob.glob('$dir/*/watch.json')
|
||||
if len(watch_files) != 2:
|
||||
sys.exit(1)
|
||||
titles = []
|
||||
for wf in watch_files:
|
||||
with open(wf) as f:
|
||||
data = json.load(f)
|
||||
titles.append(data.get('title'))
|
||||
sys.exit(0 if 'Site One' in titles and 'Site Two' in titles else 1)
|
||||
"
|
||||
}
|
||||
|
||||
test_batch_mode_exit() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-batch-${test_id}-$$"
|
||||
local start=$(date +%s)
|
||||
timeout 15 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-b &>/dev/null
|
||||
local end=$(date +%s)
|
||||
local elapsed=$((end - start))
|
||||
[ $elapsed -lt 14 ]
|
||||
}
|
||||
|
||||
test_recheck_all() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-recheck-all-${test_id}-$$"
|
||||
# Create a watch using CLI, then recheck it
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C -u https://example.com -b &>/dev/null
|
||||
# Now recheck all watches
|
||||
timeout 10 python3 changedetection.py -d "$dir" -r all -b 2>&1 | grep -q "Queuing"
|
||||
}
|
||||
|
||||
test_recheck_specific() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-recheck-uuid-${test_id}-$$"
|
||||
# Create 2 watches using CLI
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-b &>/dev/null
|
||||
# Get the UUIDs that were created
|
||||
local uuids=$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' -exec dirname {} \; | xargs -n1 basename | tr '\n' ',' | sed 's/,$//')
|
||||
# Now recheck specific UUIDs
|
||||
timeout 10 python3 changedetection.py -d "$dir" -r "$uuids" -b 2>&1 | grep -q "Queuing"
|
||||
}
|
||||
|
||||
test_combined_operations() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-combined-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-r all \
|
||||
-b &>/dev/null
|
||||
# Count watch directories (UUID directories containing watch.json)
|
||||
[ "$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' | wc -l)" -eq 2 ]
|
||||
}
|
||||
|
||||
test_invalid_json() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-invalid-${test_id}-$$"
|
||||
timeout 5 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 'invalid json here' \
|
||||
2>&1 | grep -qi "invalid json\|json decode error"
|
||||
}
|
||||
|
||||
test_create_directory() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-create-${test_id}-$$/nested/path"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-b &>/dev/null
|
||||
[ -d "$dir" ]
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main Test Execution
|
||||
# =============================================================================
|
||||
|
||||
echo "=========================================="
|
||||
echo " CLI Options Test Suite (Parallel)"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Launch all tests in parallel
|
||||
run_test 1 "Help flag (--help) shows usage without initialization" test_help_flag
|
||||
run_test 2 "Version flag (--version) displays version" test_version_flag
|
||||
run_test 3 "Add single URL with -u flag" test_single_url
|
||||
run_test 4 "Add multiple URLs with multiple -u flags" test_multiple_urls
|
||||
run_test 5 "Add URL with JSON options using -u0" test_url_with_options
|
||||
run_test 6 "Add multiple URLs with different options (-u0, -u1)" test_multiple_urls_with_options
|
||||
run_test 7 "Batch mode (-b) exits automatically after processing" test_batch_mode_exit
|
||||
run_test 8 "Recheck all watches with -r all" test_recheck_all
|
||||
run_test 9 "Recheck specific watches with -r UUID" test_recheck_specific
|
||||
run_test 10 "Combined: Add URLs and recheck all with -u and -r all" test_combined_operations
|
||||
run_test 11 "Invalid JSON in -u0 option should show error" test_invalid_json
|
||||
run_test 12 "Create datastore directory with -C flag" test_create_directory
|
||||
|
||||
# Wait for all tests to complete
|
||||
echo ""
|
||||
echo "Waiting for all tests to complete..."
|
||||
wait
|
||||
|
||||
# Collect results
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " Test Summary"
|
||||
echo "=========================================="
|
||||
|
||||
TESTS_RUN=0
|
||||
TESTS_PASSED=0
|
||||
TESTS_FAILED=0
|
||||
|
||||
for result_file in "$TEST_RESULTS_DIR"/test_*.result; do
|
||||
if [ -f "$result_file" ]; then
|
||||
TESTS_RUN=$((TESTS_RUN + 1))
|
||||
status=$(cut -d'|' -f1 < "$result_file")
|
||||
if [ "$status" = "pass" ]; then
|
||||
TESTS_PASSED=$((TESTS_PASSED + 1))
|
||||
else
|
||||
TESTS_FAILED=$((TESTS_FAILED + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Tests run: $TESTS_RUN"
|
||||
echo -e "${GREEN}Tests passed: $TESTS_PASSED${NC}"
|
||||
if [ $TESTS_FAILED -gt 0 ]; then
|
||||
echo -e "${RED}Tests failed: $TESTS_FAILED${NC}"
|
||||
else
|
||||
echo -e "${GREEN}Tests failed: $TESTS_FAILED${NC}"
|
||||
fi
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Exit with appropriate code
|
||||
if [ $TESTS_FAILED -gt 0 ]; then
|
||||
echo -e "${RED}Some tests failed!${NC}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "${GREEN}All tests passed!${NC}"
|
||||
exit 0
|
||||
fi
|
||||
@@ -5,13 +5,16 @@ from threading import Thread
|
||||
|
||||
import pytest
|
||||
import arrow
|
||||
from changedetectionio import changedetection_app
|
||||
from changedetectionio import store
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import init_app_secret
|
||||
# CRITICAL: Set short timeout for tests to prevent 45-second hangs
|
||||
# When test server is slow/unresponsive, workers fail fast instead of holding UUIDs for 45s
|
||||
# This prevents exponential priority growth from repeated deferrals (priority × 10 each defer)
|
||||
os.environ['DEFAULT_SETTINGS_REQUESTS_TIMEOUT'] = '5'
|
||||
|
||||
from changedetectionio.flask_app import init_app_secret, changedetection_app
|
||||
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
|
||||
|
||||
# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py
|
||||
@@ -31,6 +34,93 @@ def reportlog(pytestconfig):
|
||||
logger.remove(handler_id)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def per_test_log_file(request):
|
||||
"""Create a separate log file for each test function with pytest output."""
|
||||
import re
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
log_dir = os.path.join(os.path.dirname(__file__), "logs")
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
|
||||
# Generate log filename from test name and worker ID (for parallel runs)
|
||||
test_name = request.node.name
|
||||
|
||||
# Sanitize test name - replace unsafe characters with underscores
|
||||
# Keep only alphanumeric, dash, underscore, and period
|
||||
safe_test_name = re.sub(r'[^\w\-.]', '_', test_name)
|
||||
|
||||
# Limit length to avoid filesystem issues (max 200 chars)
|
||||
if len(safe_test_name) > 200:
|
||||
# Keep first 150 chars + hash of full name + last 30 chars
|
||||
import hashlib
|
||||
name_hash = hashlib.md5(test_name.encode()).hexdigest()[:8]
|
||||
safe_test_name = f"{safe_test_name[:150]}_{name_hash}_{safe_test_name[-30:]}"
|
||||
|
||||
worker_id = os.environ.get('PYTEST_XDIST_WORKER', 'master')
|
||||
log_file = os.path.join(log_dir, f"{safe_test_name}_{worker_id}.log")
|
||||
|
||||
# Add file handler for this test with TRACE level
|
||||
handler_id = logger.add(
|
||||
log_file,
|
||||
format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {process} | {name}:{function}:{line} - {message}",
|
||||
level="TRACE",
|
||||
mode="w", # Overwrite if exists
|
||||
enqueue=True # Thread-safe
|
||||
)
|
||||
|
||||
logger.info(f"=== Starting test: {test_name} (worker: {worker_id}) ===")
|
||||
logger.info(f"Test location: {request.node.nodeid}")
|
||||
|
||||
yield
|
||||
|
||||
# Capture test outcome (PASSED/FAILED/SKIPPED/ERROR)
|
||||
outcome = "UNKNOWN"
|
||||
exc_info = None
|
||||
stdout = None
|
||||
stderr = None
|
||||
|
||||
if hasattr(request.node, 'rep_call'):
|
||||
outcome = request.node.rep_call.outcome.upper()
|
||||
if request.node.rep_call.failed:
|
||||
exc_info = request.node.rep_call.longreprtext
|
||||
# Capture stdout/stderr from call phase
|
||||
if hasattr(request.node.rep_call, 'sections'):
|
||||
for section_name, section_content in request.node.rep_call.sections:
|
||||
if 'stdout' in section_name.lower():
|
||||
stdout = section_content
|
||||
elif 'stderr' in section_name.lower():
|
||||
stderr = section_content
|
||||
elif hasattr(request.node, 'rep_setup'):
|
||||
if request.node.rep_setup.failed:
|
||||
outcome = "SETUP_FAILED"
|
||||
exc_info = request.node.rep_setup.longreprtext
|
||||
|
||||
logger.info(f"=== Test Result: {outcome} ===")
|
||||
|
||||
if exc_info:
|
||||
logger.error(f"=== Test Failure Details ===\n{exc_info}")
|
||||
|
||||
if stdout:
|
||||
logger.info(f"=== Captured stdout ===\n{stdout}")
|
||||
|
||||
if stderr:
|
||||
logger.warning(f"=== Captured stderr ===\n{stderr}")
|
||||
|
||||
logger.info(f"=== Finished test: {test_name} ===")
|
||||
logger.remove(handler_id)
|
||||
|
||||
|
||||
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
|
||||
def pytest_runtest_makereport(item, call):
|
||||
"""Hook to capture test results and attach to the test node."""
|
||||
outcome = yield
|
||||
rep = outcome.get_result()
|
||||
|
||||
# Store report on the test node for access in fixtures
|
||||
setattr(item, f"rep_{rep.when}", rep)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def environment(mocker):
|
||||
"""Mock arrow.now() to return a fixed datetime for testing jinja2 time extension."""
|
||||
@@ -159,9 +249,64 @@ def prepare_test_function(live_server, datastore_path):
|
||||
# CRITICAL: Get datastore and stop it from writing stale data
|
||||
datastore = live_server.app.config.get('DATASTORE')
|
||||
|
||||
# Prevent background thread from writing during cleanup/reload
|
||||
datastore.needs_write = False
|
||||
datastore.needs_write_urgent = False
|
||||
# Clear the queue before starting the test to prevent state leakage
|
||||
from changedetectionio.flask_app import update_q
|
||||
while not update_q.empty():
|
||||
try:
|
||||
update_q.get_nowait()
|
||||
except:
|
||||
break
|
||||
|
||||
# Add test helper methods to the app for worker management
|
||||
def set_workers(count):
|
||||
"""Set the number of workers for testing - brutal shutdown, no delays"""
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
|
||||
current_count = worker_pool.get_worker_count()
|
||||
|
||||
# Special case: Setting to 0 means shutdown all workers brutally
|
||||
if count == 0:
|
||||
logger.debug(f"Brutally shutting down all {current_count} workers")
|
||||
worker_pool.shutdown_workers()
|
||||
return {
|
||||
'status': 'success',
|
||||
'message': f'Shutdown all {current_count} workers',
|
||||
'previous_count': current_count,
|
||||
'current_count': 0
|
||||
}
|
||||
|
||||
# Adjust worker count (no delays, no verification)
|
||||
result = worker_pool.adjust_async_worker_count(
|
||||
count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=live_server.app,
|
||||
datastore=datastore
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def check_all_workers_alive(expected_count):
|
||||
"""Check that all expected workers are alive"""
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
result = worker_pool.check_worker_health(
|
||||
expected_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=live_server.app,
|
||||
datastore=datastore
|
||||
)
|
||||
assert result['status'] == 'healthy', f"Workers not healthy: {result['message']}"
|
||||
return result
|
||||
|
||||
# Attach helper methods to app for easy test access
|
||||
live_server.app.set_workers = set_workers
|
||||
live_server.app.check_all_workers_alive = check_all_workers_alive
|
||||
|
||||
|
||||
|
||||
|
||||
# CRITICAL: Clean up any files from previous tests
|
||||
# This ensures a completely clean directory
|
||||
@@ -183,10 +328,18 @@ def prepare_test_function(live_server, datastore_path):
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup: Clear watches again after test
|
||||
# Cleanup: Clear watches and queue after test
|
||||
try:
|
||||
from changedetectionio.flask_app import update_q
|
||||
|
||||
# Clear the queue to prevent leakage to next test
|
||||
while not update_q.empty():
|
||||
try:
|
||||
update_q.get_nowait()
|
||||
except:
|
||||
break
|
||||
|
||||
datastore.data['watching'] = {}
|
||||
datastore.needs_write = True
|
||||
except Exception as e:
|
||||
logger.warning(f"Error during datastore cleanup: {e}")
|
||||
|
||||
@@ -238,17 +391,20 @@ def app(request, datastore_path):
|
||||
app.config['TEST_DATASTORE_PATH'] = datastore_path
|
||||
|
||||
def teardown():
|
||||
import threading
|
||||
import time
|
||||
|
||||
# Stop all threads and services
|
||||
datastore.stop_thread = True
|
||||
app.config.exit.set()
|
||||
|
||||
|
||||
# Shutdown workers gracefully before loguru cleanup
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
from changedetectionio import worker_pool
|
||||
worker_pool.shutdown_workers()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# Stop socket server threads
|
||||
try:
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
@@ -256,14 +412,35 @@ def app(request, datastore_path):
|
||||
socketio_server.shutdown()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Give threads a moment to finish their shutdown
|
||||
import time
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
# Get all active threads before cleanup
|
||||
main_thread = threading.main_thread()
|
||||
active_threads = [t for t in threading.enumerate() if t != main_thread and t.is_alive()]
|
||||
|
||||
# Wait for non-daemon threads to finish (with timeout)
|
||||
timeout = 2.0 # 2 seconds max wait
|
||||
start_time = time.time()
|
||||
|
||||
for thread in active_threads:
|
||||
if not thread.daemon:
|
||||
remaining_time = timeout - (time.time() - start_time)
|
||||
if remaining_time > 0:
|
||||
logger.debug(f"Waiting for non-daemon thread to finish: {thread.name}")
|
||||
thread.join(timeout=remaining_time)
|
||||
if thread.is_alive():
|
||||
logger.warning(f"Thread {thread.name} did not finish in time")
|
||||
|
||||
# Give daemon threads a moment to finish their current work
|
||||
time.sleep(0.2)
|
||||
|
||||
# Log any threads still running
|
||||
remaining_threads = [t for t in threading.enumerate() if t != main_thread and t.is_alive()]
|
||||
if remaining_threads:
|
||||
logger.debug(f"Threads still running after teardown: {[t.name for t in remaining_threads]}")
|
||||
|
||||
# Remove all loguru handlers to prevent "closed file" errors
|
||||
logger.remove()
|
||||
|
||||
|
||||
# Cleanup files
|
||||
cleanup(app_config['datastore_path'])
|
||||
|
||||
@@ -272,4 +449,3 @@ def app(request, datastore_path):
|
||||
yield app
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
|
||||
from changedetectionio.tests.util import wait_for_all_checks
|
||||
|
||||
|
||||
def test_check_plugin_processor(client, live_server, measure_memory_usage, datastore_path):
|
||||
# requires os-int intelligence plugin installed (first basic one we test with)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'OSINT Reconnaissance' in res.data, "Must have the OSINT plugin installed at test time"
|
||||
assert b'<input checked id="processor-0" name="processor" type="radio" value="text_json_diff">' in res.data, "But the first text_json_diff processor should always be selected by default in quick watch form"
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": 'http://127.0.0.1', "tags": '', 'processor': 'osint_recon'},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'Target: http://127.0.0.1' in res.data
|
||||
assert b'DNSKEY Records' in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
# Now change it to something that doesnt exist
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
live_server.app.config['DATASTORE'].data['watching'][uuid]['processor'] = "now_missing"
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b"Exception: Processor module" in res.data and b'now_missing' in res.data, f'Should register that the plugin is missing for {uuid}'
|
||||
@@ -124,7 +124,6 @@ def test_check_access_control(app, client, live_server, measure_memory_usage, da
|
||||
|
||||
# Menu should be available now
|
||||
assert b"SETTINGS" in res.data
|
||||
assert b"BACKUP" in res.data
|
||||
assert b"IMPORT" in res.data
|
||||
assert b"LOG OUT" in res.data
|
||||
assert b"time_between_check-minutes" in res.data
|
||||
|
||||
@@ -58,7 +58,7 @@ def is_valid_uuid(val):
|
||||
|
||||
|
||||
def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
@@ -109,18 +109,17 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert len(res.json) == 0
|
||||
time.sleep(1)
|
||||
time.sleep(2)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
# Trigger recheck of all ?recheck_all=1
|
||||
client.get(
|
||||
res = client.get(
|
||||
url_for("createwatch", recheck_all='1'),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
time.sleep(1)
|
||||
time.sleep(2)
|
||||
# Did the recheck fire?
|
||||
res = client.get(
|
||||
url_for("createwatch"),
|
||||
@@ -466,7 +465,10 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datasto
|
||||
|
||||
assert res.status_code == 400, "Should get error 400 when we give a field that doesnt exist"
|
||||
# Message will come from `flask_expects_json`
|
||||
assert b'Additional properties are not allowed' in res.data
|
||||
# With patternProperties for processor_config_*, the error message format changed slightly
|
||||
assert (b'Additional properties are not allowed' in res.data or
|
||||
b'does not match any of the regexes' in res.data), \
|
||||
"Should reject unknown fields with schema validation error"
|
||||
|
||||
|
||||
# Try a XSS URL
|
||||
@@ -507,7 +509,7 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Enable password check and diff page access bypass
|
||||
@@ -549,3 +551,172 @@ def test_api_conflict_UI_password(client, live_server, measure_memory_usage, dat
|
||||
assert len(res.json)
|
||||
|
||||
|
||||
def test_api_url_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test URL validation for edge cases in both CREATE and UPDATE endpoints.
|
||||
Addresses security issues where empty/null/invalid URLs could bypass validation.
|
||||
|
||||
This test ensures that:
|
||||
- CREATE endpoint rejects null, empty, and invalid URLs
|
||||
- UPDATE endpoint rejects attempts to change URL to null, empty, or invalid
|
||||
- UPDATE endpoint allows updating other fields without touching URL
|
||||
- URL validation properly checks protocol, format, and safety
|
||||
"""
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Test 1: CREATE with null URL should fail
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": None}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 400, "Creating watch with null URL should fail"
|
||||
|
||||
# Test 2: CREATE with empty string URL should fail
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": ""}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 400, "Creating watch with empty string URL should fail"
|
||||
assert b'Invalid or unsupported URL' in res.data or b'required' in res.data.lower()
|
||||
|
||||
# Test 3: CREATE with whitespace-only URL should fail
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": " "}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 400, "Creating watch with whitespace-only URL should fail"
|
||||
|
||||
# Test 4: CREATE with invalid protocol should fail
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "javascript:alert(1)"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 400, "Creating watch with javascript: protocol should fail"
|
||||
|
||||
# Test 5: CREATE with missing protocol should fail
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "example.com"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 400, "Creating watch without protocol should fail"
|
||||
|
||||
# Test 6: CREATE with valid URL should succeed (baseline)
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": test_url, "title": "Valid URL test"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 201, "Creating watch with valid URL should succeed"
|
||||
assert is_valid_uuid(res.json.get('uuid'))
|
||||
watch_uuid = res.json.get('uuid')
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Test 7: UPDATE to null URL should fail
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({"url": None}),
|
||||
)
|
||||
assert res.status_code == 400, "Updating watch URL to null should fail"
|
||||
# Accept either OpenAPI validation error or our custom validation error
|
||||
assert b'URL cannot be null' in res.data or b'OpenAPI validation failed' in res.data or b'validation error' in res.data.lower()
|
||||
|
||||
# Test 8: UPDATE to empty string URL should fail
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({"url": ""}),
|
||||
)
|
||||
assert res.status_code == 400, "Updating watch URL to empty string should fail"
|
||||
# Accept either our custom validation error or OpenAPI/schema validation error
|
||||
assert b'URL cannot be empty' in res.data or b'OpenAPI validation' in res.data or b'Invalid or unsupported URL' in res.data
|
||||
|
||||
# Test 9: UPDATE to whitespace-only URL should fail
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({"url": " \t\n "}),
|
||||
)
|
||||
assert res.status_code == 400, "Updating watch URL to whitespace should fail"
|
||||
# Accept either our custom validation error or generic validation error
|
||||
assert b'URL cannot be empty' in res.data or b'Invalid or unsupported URL' in res.data or b'validation' in res.data.lower()
|
||||
|
||||
# Test 10: UPDATE to invalid protocol should fail (javascript:)
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({"url": "javascript:alert(document.domain)"}),
|
||||
)
|
||||
assert res.status_code == 400, "Updating watch URL to XSS attempt should fail"
|
||||
assert b'Invalid or unsupported URL' in res.data or b'protocol' in res.data.lower()
|
||||
|
||||
# Test 11: UPDATE to file:// protocol should fail (unless ALLOW_FILE_URI is set)
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({"url": "file:///etc/passwd"}),
|
||||
)
|
||||
assert res.status_code == 400, "Updating watch URL to file:// should fail by default"
|
||||
|
||||
# Test 12: UPDATE other fields without URL should succeed
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({"title": "Updated title without URL change"}),
|
||||
)
|
||||
assert res.status_code == 200, "Updating other fields without URL should succeed"
|
||||
|
||||
# Test 13: Verify URL is still valid after non-URL update
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.json.get('url') == test_url, "URL should remain unchanged"
|
||||
assert res.json.get('title') == "Updated title without URL change"
|
||||
|
||||
# Test 14: UPDATE to valid different URL should succeed
|
||||
new_valid_url = test_url + "?new=param"
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||
data=json.dumps({"url": new_valid_url}),
|
||||
)
|
||||
assert res.status_code == 200, "Updating to valid different URL should succeed"
|
||||
|
||||
# Test 15: Verify URL was actually updated
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.json.get('url') == new_valid_url, "URL should be updated to new valid URL"
|
||||
|
||||
# Test 16: CREATE with XSS in URL parameters should fail
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": "http://example.com?xss=<script>alert(1)</script>"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
# This should fail because of suspicious characters check
|
||||
assert res.status_code == 400, "Creating watch with XSS in URL params should fail"
|
||||
|
||||
# Cleanup
|
||||
client.delete(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Test notification_urls validation in Watch and Tag API endpoints.
|
||||
Ensures that invalid AppRise URLs are rejected when setting notification_urls.
|
||||
|
||||
Valid AppRise notification URLs use specific protocols like:
|
||||
- posts://example.com - POST to HTTP endpoint
|
||||
- gets://example.com - GET to HTTP endpoint
|
||||
- mailto://user@example.com - Email
|
||||
- slack://token/channel - Slack
|
||||
- discord://webhook_id/webhook_token - Discord
|
||||
- etc.
|
||||
|
||||
Invalid notification URLs:
|
||||
- https://example.com - Plain HTTPS is NOT a valid AppRise notification protocol
|
||||
- ftp://example.com - FTP is NOT a valid AppRise notification protocol
|
||||
- Plain URLs without proper AppRise protocol prefix
|
||||
"""
|
||||
|
||||
from flask import url_for
|
||||
import json
|
||||
|
||||
|
||||
def test_watch_notification_urls_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that Watch PUT/POST endpoints validate notification_urls."""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Test 1: Create a watch with valid notification URLs
|
||||
valid_urls = ["posts://example.com/notify1", "posts://example.com/notify2"]
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"notification_urls": valid_urls
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 201, "Should accept valid notification URLs on watch creation"
|
||||
watch_uuid = res.json['uuid']
|
||||
|
||||
# Verify the notification URLs were saved
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert set(res.json['notification_urls']) == set(valid_urls), "Valid notification URLs should be saved"
|
||||
|
||||
# Test 2: Try to create a watch with invalid notification URLs (https:// is not valid)
|
||||
invalid_urls = ["https://example.com/webhook"]
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"notification_urls": invalid_urls
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject https:// notification URLs (not a valid AppRise protocol)"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 2b: Also test other invalid protocols
|
||||
invalid_urls_ftp = ["ftp://not-apprise-url"]
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"notification_urls": invalid_urls_ftp
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject ftp:// notification URLs"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 3: Update watch with valid notification URLs
|
||||
new_valid_urls = ["posts://newserver.com"]
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"notification_urls": new_valid_urls}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200, "Should accept valid notification URLs on watch update"
|
||||
|
||||
# Verify the notification URLs were updated
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert res.json['notification_urls'] == new_valid_urls, "Valid notification URLs should be updated"
|
||||
|
||||
# Test 4: Try to update watch with invalid notification URLs (plain https:// not valid)
|
||||
invalid_https_url = ["https://example.com/webhook"]
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"notification_urls": invalid_https_url}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject https:// notification URLs on watch update"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 5: Update watch with non-list notification_urls (caught by OpenAPI schema validation)
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"notification_urls": "not-a-list"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject non-list notification_urls"
|
||||
assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
|
||||
|
||||
# Test 6: Verify original URLs are preserved after failed update
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert res.json['notification_urls'] == new_valid_urls, "URLs should remain unchanged after validation failure"
|
||||
|
||||
|
||||
def test_tag_notification_urls_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that Tag PUT endpoint validates notification_urls."""
|
||||
from changedetectionio.model import Tag
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
# Create a tag
|
||||
tag_uuid = datastore.add_tag(title="Test Tag")
|
||||
assert tag_uuid is not None
|
||||
|
||||
# Test 1: Update tag with valid notification URLs
|
||||
valid_urls = ["posts://example.com/tag-notify"]
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
data=json.dumps({"notification_urls": valid_urls}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200, "Should accept valid notification URLs on tag update"
|
||||
|
||||
# Verify the notification URLs were saved
|
||||
tag = datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
assert tag['notification_urls'] == valid_urls, "Valid notification URLs should be saved to tag"
|
||||
|
||||
# Test 2: Try to update tag with invalid notification URLs (https:// not valid)
|
||||
invalid_urls = ["https://example.com/webhook"]
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
data=json.dumps({"notification_urls": invalid_urls}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject https:// notification URLs on tag update"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 3: Update tag with non-list notification_urls (caught by OpenAPI schema validation)
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
data=json.dumps({"notification_urls": "not-a-list"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject non-list notification_urls"
|
||||
assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
|
||||
|
||||
# Test 4: Verify original URLs are preserved after failed update
|
||||
tag = datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
assert tag['notification_urls'] == valid_urls, "URLs should remain unchanged after validation failure"
|
||||
@@ -80,7 +80,10 @@ def test_openapi_validation_invalid_field_in_request_body(client, live_server, m
|
||||
# Should get 400 error due to invalid field (this will be caught by internal validation)
|
||||
# Note: This tests the flow where OpenAPI validation passes but internal validation catches it
|
||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||
assert b"Additional properties are not allowed" in res.data, "Should contain validation error about additional properties"
|
||||
# With patternProperties for processor_config_*, the error message format changed slightly
|
||||
assert (b"Additional properties are not allowed" in res.data or
|
||||
b"does not match any of the regexes" in res.data), \
|
||||
"Should contain validation error about additional/invalid properties"
|
||||
|
||||
|
||||
def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -0,0 +1,805 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Comprehensive security and edge case tests for the API.
|
||||
Tests critical areas that were identified as gaps in the existing test suite.
|
||||
"""
|
||||
|
||||
import time
|
||||
import json
|
||||
import threading
|
||||
import uuid as uuid_module
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
||||
import os
|
||||
|
||||
|
||||
def set_original_response(datastore_path):
|
||||
test_return_data = """<html>
|
||||
<body>
|
||||
Some initial text<br>
|
||||
<p>Which is across multiple lines</p>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
return None
|
||||
|
||||
|
||||
def is_valid_uuid(val):
|
||||
try:
|
||||
uuid_module.UUID(str(val))
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# TIER 1: CRITICAL SECURITY TESTS
|
||||
# ============================================================================
|
||||
|
||||
def test_api_path_traversal_in_uuids(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that path traversal attacks via UUID parameter are blocked.
|
||||
Addresses CVE-like vulnerabilities where ../../../ in UUID could access arbitrary files.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create a valid watch first
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": test_url, "title": "Valid watch"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201
|
||||
valid_uuid = res.json.get('uuid')
|
||||
|
||||
# Test 1: Path traversal with ../../../
|
||||
res = client.get(
|
||||
f"/api/v1/watch/../../etc/passwd",
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code in [400, 404], "Path traversal should be rejected"
|
||||
|
||||
# Test 2: Encoded path traversal
|
||||
res = client.get(
|
||||
"/api/v1/watch/..%2F..%2F..%2Fetc%2Fpasswd",
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code in [400, 404], "Encoded path traversal should be rejected"
|
||||
|
||||
# Test 3: Double-encoded path traversal
|
||||
res = client.get(
|
||||
"/api/v1/watch/%2e%2e%2f%2e%2e%2f%2e%2e%2f",
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code in [400, 404], "Double-encoded traversal should be rejected"
|
||||
|
||||
# Test 4: Try to access datastore file
|
||||
res = client.get(
|
||||
"/api/v1/watch/../url-watches.json",
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code in [400, 404], "Access to datastore should be blocked"
|
||||
|
||||
# Test 5: Null byte injection
|
||||
res = client.get(
|
||||
f"/api/v1/watch/{valid_uuid}%00.json",
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
# Should either work (ignoring null byte) or reject - but not crash
|
||||
assert res.status_code in [200, 400, 404]
|
||||
|
||||
# Test 6: DELETE with path traversal
|
||||
res = client.delete(
|
||||
"/api/v1/watch/../../datastore/url-watches.json",
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code in [400, 404, 405], "DELETE with traversal should be blocked (405=method not allowed is also acceptable)"
|
||||
|
||||
# Cleanup
|
||||
client.delete(url_for("watch", uuid=valid_uuid), headers={'x-api-key': api_key})
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_injection_via_headers_and_proxy(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that injection attacks via headers and proxy fields are properly sanitized.
|
||||
Addresses XSS and injection vulnerabilities.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Test 1: XSS in headers
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"headers": {
|
||||
"User-Agent": "<script>alert(1)</script>",
|
||||
"X-Custom": "'; DROP TABLE watches; --"
|
||||
}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Headers are metadata used for HTTP requests, not HTML rendering
|
||||
# Storing them as-is is expected behavior
|
||||
assert res.status_code in [201, 400]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
# Verify headers are stored (API returns JSON, not HTML, so no XSS risk)
|
||||
res = client.get(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
assert res.status_code == 200
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 2: Null bytes in headers
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"headers": {"X-Test": "value\x00null"}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should handle null bytes gracefully (reject or sanitize)
|
||||
assert res.status_code in [201, 400]
|
||||
|
||||
# Test 3: Malformed proxy string
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"proxy": "http://evil.com:8080@victim.com"
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should reject invalid proxy format
|
||||
assert res.status_code == 400
|
||||
|
||||
# Test 4: Control characters in notification title
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"notification_title": "Test\r\nInjected-Header: value"
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should accept but sanitize control characters
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_large_payload_dos(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that excessively large payloads are rejected to prevent DoS.
|
||||
Addresses memory leak issues found in changelog.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Test 1: Huge ignore_text array
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"ignore_text": ["a" * 10000] * 100 # 1MB of data
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should either accept (with limits) or reject
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 2: Massive headers object
|
||||
huge_headers = {f"X-Header-{i}": "x" * 1000 for i in range(100)}
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"headers": huge_headers
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should reject or truncate
|
||||
assert res.status_code in [201, 400, 413]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 3: Huge browser_steps array
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"browser_steps": [
|
||||
{"operation": "click", "selector": "#test" * 1000, "optional_value": ""}
|
||||
] * 100
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should reject or limit
|
||||
assert res.status_code in [201, 400, 413]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 4: Extremely long title
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"title": "x" * 100000 # 100KB title
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should reject (exceeds maxLength: 5000)
|
||||
assert res.status_code == 400
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_utf8_encoding_edge_cases(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test UTF-8 encoding edge cases that have caused bugs on Windows.
|
||||
Addresses 18+ encoding bugs from changelog.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Test 1: Unicode in title (should work)
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"title": "Test 中文 Ελληνικά 日本語 🔥"
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201
|
||||
watch_uuid = res.json.get('uuid')
|
||||
|
||||
# Verify it round-trips correctly
|
||||
res = client.get(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
assert res.status_code == 200
|
||||
assert "中文" in res.json.get('title')
|
||||
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 2: Unicode in URL query parameters
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url + "?search=日本語"
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should handle URL encoding properly
|
||||
assert res.status_code in [201, 400]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 3: Null byte in title (should be rejected or sanitized)
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"title": "Test\x00Title"
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should handle gracefully
|
||||
assert res.status_code in [201, 400]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 4: BOM (Byte Order Mark) in title
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"title": "\ufeffTest with BOM"
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code in [201, 400]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_concurrency_race_conditions(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test concurrent API requests to detect race conditions.
|
||||
Addresses 20+ concurrency bugs from changelog.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create a watch
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": test_url, "title": "Concurrency test"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201
|
||||
watch_uuid = res.json.get('uuid')
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Test 1: Concurrent updates to same watch
|
||||
# Note: Flask test client is not thread-safe, so we test sequential updates instead
|
||||
# Real concurrency issues would be caught in integration tests with actual HTTP requests
|
||||
results = []
|
||||
for i in range(10):
|
||||
try:
|
||||
r = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"title": f"Title {i}"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
results.append(r.status_code)
|
||||
except Exception as e:
|
||||
results.append(str(e))
|
||||
|
||||
# All updates should succeed (200) without crashes
|
||||
assert all(r == 200 for r in results), f"Some updates failed: {results}"
|
||||
|
||||
# Test 2: Update while watch is being checked
|
||||
# Queue a recheck
|
||||
client.get(
|
||||
url_for("watch", uuid=watch_uuid, recheck=True),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
|
||||
# Immediately update it
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"title": "Updated during check"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should succeed without error
|
||||
assert res.status_code == 200
|
||||
|
||||
# Test 3: Delete watch that's being processed
|
||||
# Create another watch
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": test_url}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
watch_uuid2 = res.json.get('uuid')
|
||||
|
||||
# Queue it for checking
|
||||
client.get(url_for("watch", uuid=watch_uuid2, recheck=True), headers={'x-api-key': api_key})
|
||||
|
||||
# Immediately delete it
|
||||
res = client.delete(url_for("watch", uuid=watch_uuid2), headers={'x-api-key': api_key})
|
||||
# Should succeed or return appropriate error
|
||||
assert res.status_code in [204, 404, 400]
|
||||
|
||||
# Cleanup
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# TIER 2: IMPORTANT FUNCTIONALITY TESTS
|
||||
# ============================================================================
|
||||
|
||||
def test_api_time_validation_edge_cases(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test time_between_check validation edge cases.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Test 1: Zero interval
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {"seconds": 0}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 400, "Zero interval should be rejected"
|
||||
|
||||
# Test 2: Negative interval
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {"seconds": -100}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 400, "Negative interval should be rejected"
|
||||
|
||||
# Test 3: All fields null with use_default=false
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {"weeks": None, "days": None, "hours": None, "minutes": None, "seconds": None}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 400, "All null intervals should be rejected when not using default"
|
||||
|
||||
# Test 4: Extremely large interval (overflow risk)
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {"weeks": 999999999}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should either accept (with limits) or reject
|
||||
assert res.status_code in [201, 400]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 5: Valid minimal interval (should work)
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"time_between_check_use_default": False,
|
||||
"time_between_check": {"seconds": 60}
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
assert res.status_code == 201
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_browser_steps_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test browser_steps validation for invalid operations and structures.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Test 1: Empty browser step
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"browser_steps": [
|
||||
{"operation": "", "selector": "", "optional_value": ""}
|
||||
]
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should accept (empty is valid as null)
|
||||
assert res.status_code in [201, 400]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 2: Invalid operation type
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"browser_steps": [
|
||||
{"operation": "invalid_operation", "selector": "#test", "optional_value": ""}
|
||||
]
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should accept (validation happens at runtime) or reject
|
||||
assert res.status_code in [201, 400]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 3: Missing required fields in browser step
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"browser_steps": [
|
||||
{"operation": "click"} # Missing selector and optional_value
|
||||
]
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should be rejected due to schema validation
|
||||
assert res.status_code == 400
|
||||
|
||||
# Test 4: Extra fields in browser step
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"browser_steps": [
|
||||
{"operation": "click", "selector": "#test", "optional_value": "", "extra_field": "value"}
|
||||
]
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should be rejected due to additionalProperties: false
|
||||
assert res.status_code == 400
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_queue_manipulation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test queue behavior under stress and edge cases.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Test 1: Create many watches rapidly
|
||||
watch_uuids = []
|
||||
for i in range(20):
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": test_url, "title": f"Watch {i}"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
if res.status_code == 201:
|
||||
watch_uuids.append(res.json.get('uuid'))
|
||||
|
||||
assert len(watch_uuids) == 20, "Should be able to create 20 watches"
|
||||
|
||||
# Test 2: Recheck all when watches exist
|
||||
res = client.get(
|
||||
url_for("createwatch", recheck_all='1'),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
# Should return success (200 or 202 for background processing)
|
||||
assert res.status_code in [200, 202]
|
||||
|
||||
# Test 3: Verify queue doesn't overflow with moderate load
|
||||
# The app has MAX_QUEUE_SIZE = 5000, we're well below that
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Cleanup
|
||||
for uuid in watch_uuids:
|
||||
client.delete(url_for("watch", uuid=uuid), headers={'x-api-key': api_key})
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# TIER 3: EDGE CASES & POLISH
|
||||
# ============================================================================
|
||||
|
||||
def test_api_history_edge_cases(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test history API with invalid timestamps and edge cases.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create watch and generate history
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({"url": test_url}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
watch_uuid = res.json.get('uuid')
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Test 1: Get history with invalid timestamp
|
||||
res = client.get(
|
||||
url_for("watchsinglehistory", uuid=watch_uuid, timestamp="invalid"),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 404, "Invalid timestamp should return 404"
|
||||
|
||||
# Test 2: Future timestamp
|
||||
res = client.get(
|
||||
url_for("watchsinglehistory", uuid=watch_uuid, timestamp="9999999999"),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 404, "Future timestamp should return 404"
|
||||
|
||||
# Test 3: Negative timestamp
|
||||
res = client.get(
|
||||
url_for("watchsinglehistory", uuid=watch_uuid, timestamp="-1"),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 404, "Negative timestamp should return 404"
|
||||
|
||||
# Test 4: Diff with reversed timestamps (from > to)
|
||||
# First get actual timestamps
|
||||
res = client.get(
|
||||
url_for("watchhistory", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
if len(res.json) >= 2:
|
||||
timestamps = sorted(res.json.keys())
|
||||
# Try reversed order
|
||||
res = client.get(
|
||||
url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp=timestamps[-1], to_timestamp=timestamps[0]),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
# Should either work (show reverse diff) or return error
|
||||
assert res.status_code in [200, 400]
|
||||
|
||||
# Cleanup
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_notification_edge_cases(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test notification configuration edge cases.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Test 1: Invalid notification URL
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"notification_urls": ["invalid://url", "ftp://test.com"]
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should accept (apprise validates at runtime) or reject
|
||||
assert res.status_code in [201, 400]
|
||||
if res.status_code == 201:
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 2: Invalid notification format
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"notification_format": "invalid_format"
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should be rejected by schema
|
||||
assert res.status_code == 400
|
||||
|
||||
# Test 3: Empty notification arrays
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": test_url,
|
||||
"notification_urls": []
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should accept (empty is valid)
|
||||
assert res.status_code == 201
|
||||
watch_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_api_tag_edge_cases(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test tag/group API edge cases including XSS and path traversal.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Test 1: Empty tag title
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": ""}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should be rejected (empty title)
|
||||
assert res.status_code == 400
|
||||
|
||||
# Test 2: XSS in tag title
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "<script>alert(1)</script>"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should accept but sanitize
|
||||
if res.status_code == 201:
|
||||
tag_uuid = res.json.get('uuid')
|
||||
# Verify title is stored safely
|
||||
res = client.get(url_for("tag", uuid=tag_uuid), headers={'x-api-key': api_key})
|
||||
# Should be escaped or sanitized
|
||||
client.delete(url_for("tag", uuid=tag_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 3: Path traversal in tag title
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "../../etc/passwd"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should accept (it's just a string, not a path)
|
||||
if res.status_code == 201:
|
||||
tag_uuid = res.json.get('uuid')
|
||||
client.delete(url_for("tag", uuid=tag_uuid), headers={'x-api-key': api_key})
|
||||
|
||||
# Test 4: Very long tag title
|
||||
res = client.post(
|
||||
url_for("tag"),
|
||||
data=json.dumps({"title": "x" * 10000}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||
)
|
||||
# Should be rejected (exceeds maxLength)
|
||||
assert res.status_code == 400
|
||||
|
||||
|
||||
def test_api_authentication_edge_cases(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test API authentication edge cases.
|
||||
"""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Test 1: Missing API key
|
||||
res = client.get(url_for("createwatch"))
|
||||
assert res.status_code == 403, "Missing API key should be forbidden"
|
||||
|
||||
# Test 2: Invalid API key
|
||||
res = client.get(
|
||||
url_for("createwatch"),
|
||||
headers={'x-api-key': "invalid_key_12345"}
|
||||
)
|
||||
assert res.status_code == 403, "Invalid API key should be forbidden"
|
||||
|
||||
# Test 3: API key with special characters
|
||||
res = client.get(
|
||||
url_for("createwatch"),
|
||||
headers={'x-api-key': "key<script>alert(1)</script>"}
|
||||
)
|
||||
assert res.status_code == 403, "Invalid API key should be forbidden"
|
||||
|
||||
# Test 4: Very long API key
|
||||
res = client.get(
|
||||
url_for("createwatch"),
|
||||
headers={'x-api-key': "x" * 10000}
|
||||
)
|
||||
assert res.status_code == 403, "Invalid API key should be forbidden"
|
||||
|
||||
# Test 5: Case sensitivity of API key
|
||||
wrong_case_key = api_key.upper() if api_key.islower() else api_key.lower()
|
||||
res = client.get(
|
||||
url_for("createwatch"),
|
||||
headers={'x-api-key': wrong_case_key}
|
||||
)
|
||||
# Should be forbidden (keys are case-sensitive)
|
||||
assert res.status_code == 403, "Wrong case API key should be forbidden"
|
||||
|
||||
# Test 6: Valid API key should work
|
||||
res = client.get(
|
||||
url_for("createwatch"),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200, "Valid API key should work"
|
||||
@@ -125,10 +125,12 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
url_for("tag", uuid=new_tag_uuid) + "?recheck=true",
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
wait_for_all_checks()
|
||||
assert res.status_code == 200
|
||||
assert b'OK, 1 watches' in res.data
|
||||
|
||||
assert res.status_code == 200
|
||||
assert b'OK, queued 1 watches for rechecking' in res.data
|
||||
|
||||
|
||||
wait_for_all_checks()
|
||||
after_check_time = live_server.app.config['DATASTORE'].data['watching'][watch_uuid].get('last_checked')
|
||||
|
||||
assert before_check_time != after_check_time
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, extract_UUID_from_client, wait_for_all_checks
|
||||
from .util import live_server_setup, extract_UUID_from_client, wait_for_all_checks, delete_all_watches
|
||||
import os
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
||||
# And not this cause its not the ld-json
|
||||
assert b"So let's see what happens" not in res.data
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
##########################################################################################
|
||||
# And we shouldnt see the offer
|
||||
@@ -131,7 +131,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
||||
assert b'ldjson-price-track-offer' not in res.data
|
||||
|
||||
##########################################################################################
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data):
|
||||
@@ -147,7 +147,7 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_
|
||||
|
||||
|
||||
##########################################################################################
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -38,7 +38,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Default no password set, this stuff should be always available.
|
||||
|
||||
assert b"SETTINGS" in res.data
|
||||
assert b"BACKUP" in res.data
|
||||
assert b"IMPORT" in res.data
|
||||
|
||||
#####################
|
||||
@@ -415,4 +414,4 @@ def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server
|
||||
assert b'Abonnementen bijwerken' in res.data
|
||||
assert b'<foobar' not in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
res = delete_all_watches(client)
|
||||
|
||||
@@ -53,11 +53,21 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
backup = ZipFile(io.BytesIO(res.data))
|
||||
l = backup.namelist()
|
||||
uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||
newlist = list(filter(uuid4hex.match, l)) # Read Note below
|
||||
|
||||
# Check for UUID-based txt files (history and snapshot)
|
||||
uuid4hex_txt = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||
txt_files = list(filter(uuid4hex_txt.match, l))
|
||||
# Should be two txt files in the archive (history and the snapshot)
|
||||
assert len(newlist) == 2
|
||||
assert len(txt_files) == 2
|
||||
|
||||
# Check for watch.json files (new format)
|
||||
uuid4hex_json = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/watch\.json$', re.I)
|
||||
json_files = list(filter(uuid4hex_json.match, l))
|
||||
# Should be one watch.json file in the archive (the imported watch)
|
||||
assert len(json_files) == 1, f"Expected 1 watch.json file, found {len(json_files)}: {json_files}"
|
||||
|
||||
# Check for changedetection.json (settings file)
|
||||
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
||||
|
||||
# Get the latest one
|
||||
res = client.get(
|
||||
|
||||
@@ -6,7 +6,7 @@ from .util import (
|
||||
set_original_response,
|
||||
set_modified_response,
|
||||
live_server_setup,
|
||||
wait_for_all_checks
|
||||
wait_for_all_checks, delete_all_watches
|
||||
)
|
||||
from loguru import logger
|
||||
|
||||
@@ -104,7 +104,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode="", datast
|
||||
assert watch.has_unviewed, "The watch was not marked as unviewed after content change"
|
||||
|
||||
# Clean up
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_everything(live_server, client, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
@@ -0,0 +1,661 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests for immediate commit-based persistence system.
|
||||
|
||||
Tests cover:
|
||||
- Watch.commit() persistence to disk
|
||||
- Concurrent commit safety (race conditions)
|
||||
- Processor config separation
|
||||
- Data loss prevention (settings, tags, watch modifications)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import wait_for_all_checks
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# 2. Commit() Persistence Tests
|
||||
# ==============================================================================
|
||||
|
||||
def test_watch_commit_persists_to_disk(client, live_server):
|
||||
"""Test that watch.commit() actually writes to watch.json immediately"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Create a watch
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Original Title'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Modify and commit
|
||||
watch['title'] = 'Modified Title'
|
||||
watch['paused'] = True
|
||||
watch.commit()
|
||||
|
||||
# Read directly from disk (bypass datastore cache)
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
assert os.path.exists(watch_json_path), "watch.json should exist on disk"
|
||||
|
||||
with open(watch_json_path, 'r') as f:
|
||||
disk_data = json.load(f)
|
||||
|
||||
assert disk_data['title'] == 'Modified Title', "Title should be persisted to disk"
|
||||
assert disk_data['paused'] == True, "Paused state should be persisted to disk"
|
||||
assert disk_data['uuid'] == uuid, "UUID should match"
|
||||
|
||||
|
||||
def test_watch_commit_survives_reload(client, live_server):
|
||||
"""Test that committed changes survive datastore reload"""
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore_path = datastore.datastore_path
|
||||
|
||||
# Create and modify a watch
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test Watch'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
watch['title'] = 'Persisted Title'
|
||||
watch['paused'] = True
|
||||
watch['tags'] = ['tag-1', 'tag-2']
|
||||
watch.commit()
|
||||
|
||||
# Simulate app restart - create new datastore instance
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
|
||||
datastore2.reload_state(
|
||||
datastore_path=datastore_path,
|
||||
include_default_watches=False,
|
||||
version_tag='test'
|
||||
)
|
||||
|
||||
# Check data survived
|
||||
assert uuid in datastore2.data['watching'], "Watch should exist after reload"
|
||||
reloaded_watch = datastore2.data['watching'][uuid]
|
||||
assert reloaded_watch['title'] == 'Persisted Title', "Title should survive reload"
|
||||
assert reloaded_watch['paused'] == True, "Paused state should survive reload"
|
||||
assert reloaded_watch['tags'] == ['tag-1', 'tag-2'], "Tags should survive reload"
|
||||
|
||||
|
||||
def test_watch_commit_atomic_on_crash(client, live_server):
|
||||
"""Test that atomic writes prevent corruption (temp file pattern)"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Original'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# First successful commit
|
||||
watch['title'] = 'First Save'
|
||||
watch.commit()
|
||||
|
||||
# Verify watch.json exists and is valid
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f) # Should not raise JSONDecodeError
|
||||
assert data['title'] == 'First Save'
|
||||
|
||||
# Second commit - even if interrupted, original file should be intact
|
||||
# (atomic write uses temp file + rename, so original is never corrupted)
|
||||
watch['title'] = 'Second Save'
|
||||
watch.commit()
|
||||
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['title'] == 'Second Save'
|
||||
|
||||
|
||||
def test_multiple_watches_commit_independently(client, live_server):
|
||||
"""Test that committing one watch doesn't affect others"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Create multiple watches
|
||||
uuid1 = datastore.add_watch(url='http://example1.com', extras={'title': 'Watch 1'})
|
||||
uuid2 = datastore.add_watch(url='http://example2.com', extras={'title': 'Watch 2'})
|
||||
uuid3 = datastore.add_watch(url='http://example3.com', extras={'title': 'Watch 3'})
|
||||
|
||||
watch1 = datastore.data['watching'][uuid1]
|
||||
watch2 = datastore.data['watching'][uuid2]
|
||||
watch3 = datastore.data['watching'][uuid3]
|
||||
|
||||
# Modify and commit only watch2
|
||||
watch2['title'] = 'Modified Watch 2'
|
||||
watch2['paused'] = True
|
||||
watch2.commit()
|
||||
|
||||
# Read all from disk
|
||||
def read_watch_json(uuid):
|
||||
watch = datastore.data['watching'][uuid]
|
||||
path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(path, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
data1 = read_watch_json(uuid1)
|
||||
data2 = read_watch_json(uuid2)
|
||||
data3 = read_watch_json(uuid3)
|
||||
|
||||
# Only watch2 should have changes
|
||||
assert data1['title'] == 'Watch 1', "Watch 1 should be unchanged"
|
||||
assert data1['paused'] == False, "Watch 1 should not be paused"
|
||||
|
||||
assert data2['title'] == 'Modified Watch 2', "Watch 2 should be modified"
|
||||
assert data2['paused'] == True, "Watch 2 should be paused"
|
||||
|
||||
assert data3['title'] == 'Watch 3', "Watch 3 should be unchanged"
|
||||
assert data3['paused'] == False, "Watch 3 should not be paused"
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# 3. Concurrency/Race Condition Tests
|
||||
# ==============================================================================
|
||||
|
||||
def test_concurrent_watch_commits_dont_corrupt(client, live_server):
|
||||
"""Test that simultaneous commits to same watch don't corrupt JSON"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
errors = []
|
||||
|
||||
def modify_and_commit(field, value):
|
||||
try:
|
||||
watch[field] = value
|
||||
watch.commit()
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
# Run 10 concurrent commits
|
||||
threads = []
|
||||
for i in range(10):
|
||||
t = threading.Thread(target=modify_and_commit, args=('title', f'Title {i}'))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
# Should not have any errors
|
||||
assert len(errors) == 0, f"Expected no errors, got: {errors}"
|
||||
|
||||
# JSON file should still be valid (not corrupted)
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f) # Should not raise JSONDecodeError
|
||||
assert data['uuid'] == uuid, "UUID should still be correct"
|
||||
assert 'Title' in data['title'], "Title should contain 'Title'"
|
||||
|
||||
|
||||
def test_concurrent_modifications_during_commit(client, live_server):
|
||||
"""Test that modifying watch during commit doesn't cause RuntimeError"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
errors = []
|
||||
stop_flag = threading.Event()
|
||||
|
||||
def keep_modifying():
|
||||
"""Continuously modify watch"""
|
||||
try:
|
||||
i = 0
|
||||
while not stop_flag.is_set():
|
||||
watch['title'] = f'Title {i}'
|
||||
watch['paused'] = i % 2 == 0
|
||||
i += 1
|
||||
time.sleep(0.001)
|
||||
except Exception as e:
|
||||
errors.append(('modifier', e))
|
||||
|
||||
def keep_committing():
|
||||
"""Continuously commit watch"""
|
||||
try:
|
||||
for _ in range(20):
|
||||
watch.commit()
|
||||
time.sleep(0.005)
|
||||
except Exception as e:
|
||||
errors.append(('committer', e))
|
||||
|
||||
# Start concurrent modification and commits
|
||||
modifier = threading.Thread(target=keep_modifying)
|
||||
committer = threading.Thread(target=keep_committing)
|
||||
|
||||
modifier.start()
|
||||
committer.start()
|
||||
|
||||
committer.join()
|
||||
stop_flag.set()
|
||||
modifier.join()
|
||||
|
||||
# Should not have RuntimeError from dict changing during iteration
|
||||
runtime_errors = [e for source, e in errors if isinstance(e, RuntimeError)]
|
||||
assert len(runtime_errors) == 0, f"Should not have RuntimeError, got: {runtime_errors}"
|
||||
|
||||
|
||||
def test_datastore_lock_protects_commit_snapshot(client, live_server):
|
||||
"""Test that datastore.lock prevents race conditions during deepcopy"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Add some complex nested data
|
||||
watch['browser_steps'] = [
|
||||
{'operation': 'click', 'selector': '#foo'},
|
||||
{'operation': 'wait', 'seconds': 5}
|
||||
]
|
||||
|
||||
errors = []
|
||||
commits_succeeded = [0]
|
||||
|
||||
def rapid_commits():
|
||||
try:
|
||||
for i in range(50):
|
||||
watch['title'] = f'Title {i}'
|
||||
watch.commit()
|
||||
commits_succeeded[0] += 1
|
||||
time.sleep(0.001)
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
# Multiple threads doing rapid commits
|
||||
threads = [threading.Thread(target=rapid_commits) for _ in range(3)]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert len(errors) == 0, f"Expected no errors, got: {errors}"
|
||||
assert commits_succeeded[0] == 150, f"Expected 150 commits, got {commits_succeeded[0]}"
|
||||
|
||||
# Final JSON should be valid
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['uuid'] == uuid
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# 4. Processor Config Separation Tests
|
||||
# ==============================================================================
|
||||
|
||||
def test_processor_config_never_in_watch_json(client, live_server):
|
||||
"""Test that processor_config_* fields are filtered out of watch.json"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(
|
||||
url='http://example.com',
|
||||
extras={
|
||||
'title': 'Test Watch',
|
||||
'processor': 'restock_diff'
|
||||
}
|
||||
)
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Try to set processor config fields (these should be filtered during commit)
|
||||
watch['processor_config_price_threshold'] = 10.0
|
||||
watch['processor_config_some_setting'] = 'value'
|
||||
watch['processor_config_another'] = {'nested': 'data'}
|
||||
watch.commit()
|
||||
|
||||
# Read watch.json from disk
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Verify processor_config_* fields are NOT in watch.json
|
||||
for key in data.keys():
|
||||
assert not key.startswith('processor_config_'), \
|
||||
f"Found {key} in watch.json - processor configs should be in separate file!"
|
||||
|
||||
# Normal fields should still be there
|
||||
assert data['title'] == 'Test Watch'
|
||||
assert data['processor'] == 'restock_diff'
|
||||
|
||||
|
||||
def test_api_post_saves_processor_config_separately(client, live_server):
|
||||
"""Test that API POST saves processor configs to {processor}.json"""
|
||||
import json
|
||||
from changedetectionio.processors import extract_processor_config_from_form_data
|
||||
|
||||
# Get API key
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Create watch via API with processor config
|
||||
response = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
'url': 'http://example.com',
|
||||
'processor': 'restock_diff',
|
||||
'processor_config_price_threshold': 10.0,
|
||||
'processor_config_in_stock_only': True
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
|
||||
assert response.status_code in (200, 201), f"Expected 200/201, got {response.status_code}"
|
||||
uuid = response.json.get('uuid')
|
||||
assert uuid, "Should return UUID"
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Check that processor config file exists
|
||||
processor_config_path = os.path.join(watch.watch_data_dir, 'restock_diff.json')
|
||||
assert os.path.exists(processor_config_path), "Processor config file should exist"
|
||||
|
||||
with open(processor_config_path, 'r') as f:
|
||||
config = json.load(f)
|
||||
|
||||
# Verify fields are saved WITHOUT processor_config_ prefix
|
||||
assert config.get('price_threshold') == 10.0, "Should have price_threshold (no prefix)"
|
||||
assert config.get('in_stock_only') == True, "Should have in_stock_only (no prefix)"
|
||||
assert 'processor_config_price_threshold' not in config, "Should NOT have prefixed keys"
|
||||
|
||||
|
||||
def test_api_put_saves_processor_config_separately(client, live_server):
|
||||
"""Test that API PUT updates processor configs in {processor}.json"""
|
||||
import json
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Get API key
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Create watch
|
||||
uuid = datastore.add_watch(
|
||||
url='http://example.com',
|
||||
extras={'processor': 'restock_diff'}
|
||||
)
|
||||
|
||||
# Update via API with processor config
|
||||
response = client.put(
|
||||
url_for("watch", uuid=uuid),
|
||||
data=json.dumps({
|
||||
'processor_config_price_threshold': 15.0,
|
||||
'processor_config_min_stock': 5
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
|
||||
# PUT might return different status codes, 200 or 204 are both OK
|
||||
assert response.status_code in (200, 204), f"Expected 200/204, got {response.status_code}: {response.data}"
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Check processor config file
|
||||
processor_config_path = os.path.join(watch.watch_data_dir, 'restock_diff.json')
|
||||
assert os.path.exists(processor_config_path), "Processor config file should exist"
|
||||
|
||||
with open(processor_config_path, 'r') as f:
|
||||
config = json.load(f)
|
||||
|
||||
assert config.get('price_threshold') == 15.0, "Should have updated price_threshold"
|
||||
assert config.get('min_stock') == 5, "Should have min_stock"
|
||||
|
||||
|
||||
def test_ui_edit_saves_processor_config_separately(client, live_server):
|
||||
"""Test that processor_config_* fields never appear in watch.json (even from UI)"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Create watch
|
||||
uuid = datastore.add_watch(
|
||||
url='http://example.com',
|
||||
extras={'processor': 'text_json_diff', 'title': 'Test'}
|
||||
)
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Simulate someone accidentally trying to set processor_config fields directly
|
||||
watch['processor_config_should_not_save'] = 'test_value'
|
||||
watch['processor_config_another_field'] = 123
|
||||
watch['normal_field'] = 'this_should_save'
|
||||
watch.commit()
|
||||
|
||||
# Check watch.json has NO processor_config_* fields (main point of this test)
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
watch_data = json.load(f)
|
||||
|
||||
for key in watch_data.keys():
|
||||
assert not key.startswith('processor_config_'), \
|
||||
f"Found {key} in watch.json - processor configs should be filtered during commit"
|
||||
|
||||
# Verify normal fields still save
|
||||
assert watch_data['normal_field'] == 'this_should_save', "Normal fields should save"
|
||||
assert watch_data['title'] == 'Test', "Original fields should still be there"
|
||||
|
||||
|
||||
def test_browser_steps_normalized_to_empty_list(client, live_server):
|
||||
"""Test that meaningless browser_steps are normalized to [] during commit"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com')
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Set browser_steps to meaningless values
|
||||
watch['browser_steps'] = [
|
||||
{'operation': 'Choose one', 'selector': ''},
|
||||
{'operation': 'Goto site', 'selector': ''},
|
||||
{'operation': '', 'selector': '#foo'}
|
||||
]
|
||||
watch.commit()
|
||||
|
||||
# Read from disk
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Should be normalized to empty list
|
||||
assert data['browser_steps'] == [], "Meaningless browser_steps should be normalized to []"
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# 5. Data Loss Prevention Tests
|
||||
# ==============================================================================
|
||||
|
||||
def test_settings_persist_after_update(client, live_server):
|
||||
"""Test that settings updates are committed and survive restart"""
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore_path = datastore.datastore_path
|
||||
|
||||
# Update settings directly (bypass form validation issues)
|
||||
datastore.data['settings']['application']['empty_pages_are_a_change'] = True
|
||||
datastore.data['settings']['application']['fetch_backend'] = 'html_requests'
|
||||
datastore.data['settings']['requests']['time_between_check']['minutes'] = 120
|
||||
datastore.commit()
|
||||
|
||||
# Simulate restart
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
|
||||
datastore2.reload_state(
|
||||
datastore_path=datastore_path,
|
||||
include_default_watches=False,
|
||||
version_tag='test'
|
||||
)
|
||||
|
||||
# Verify settings survived
|
||||
assert datastore2.data['settings']['application']['empty_pages_are_a_change'] == True, "empty_pages_are_a_change should persist"
|
||||
assert datastore2.data['settings']['application']['fetch_backend'] == 'html_requests', "fetch_backend should persist"
|
||||
assert datastore2.data['settings']['requests']['time_between_check']['minutes'] == 120, "time_between_check should persist"
|
||||
|
||||
|
||||
def test_tag_mute_persists(client, live_server):
|
||||
"""Test that tag mute/unmute operations persist"""
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore_path = datastore.datastore_path
|
||||
|
||||
# Add a tag
|
||||
tag_uuid = datastore.add_tag('Test Tag')
|
||||
|
||||
# Mute the tag
|
||||
response = client.get(url_for("tags.mute", uuid=tag_uuid))
|
||||
assert response.status_code == 302 # Redirect
|
||||
|
||||
# Verify muted in memory
|
||||
assert datastore.data['settings']['application']['tags'][tag_uuid]['notification_muted'] == True
|
||||
|
||||
# Simulate restart
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
|
||||
datastore2.reload_state(
|
||||
datastore_path=datastore_path,
|
||||
include_default_watches=False,
|
||||
version_tag='test'
|
||||
)
|
||||
|
||||
# Verify mute state survived
|
||||
assert tag_uuid in datastore2.data['settings']['application']['tags']
|
||||
assert datastore2.data['settings']['application']['tags'][tag_uuid]['notification_muted'] == True
|
||||
|
||||
|
||||
def test_tag_delete_removes_from_watches(client, live_server):
|
||||
"""Test that deleting a tag removes it from all watches"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Create a tag
|
||||
tag_uuid = datastore.add_tag('Test Tag')
|
||||
|
||||
# Create watches with this tag
|
||||
uuid1 = datastore.add_watch(url='http://example1.com')
|
||||
uuid2 = datastore.add_watch(url='http://example2.com')
|
||||
uuid3 = datastore.add_watch(url='http://example3.com')
|
||||
|
||||
watch1 = datastore.data['watching'][uuid1]
|
||||
watch2 = datastore.data['watching'][uuid2]
|
||||
watch3 = datastore.data['watching'][uuid3]
|
||||
|
||||
watch1['tags'] = [tag_uuid]
|
||||
watch1.commit()
|
||||
watch2['tags'] = [tag_uuid, 'other-tag']
|
||||
watch2.commit()
|
||||
# watch3 has no tags
|
||||
|
||||
# Delete the tag
|
||||
response = client.get(url_for("tags.delete", uuid=tag_uuid))
|
||||
assert response.status_code == 302
|
||||
|
||||
# Wait for background thread to complete
|
||||
time.sleep(1)
|
||||
|
||||
# Tag should be removed from settings
|
||||
assert tag_uuid not in datastore.data['settings']['application']['tags']
|
||||
|
||||
# Tag should be removed from watches and persisted
|
||||
def check_watch_tags(uuid):
|
||||
watch = datastore.data['watching'][uuid]
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
return json.load(f)['tags']
|
||||
|
||||
assert tag_uuid not in check_watch_tags(uuid1), "Tag should be removed from watch1"
|
||||
assert tag_uuid not in check_watch_tags(uuid2), "Tag should be removed from watch2"
|
||||
assert 'other-tag' in check_watch_tags(uuid2), "Other tags should remain in watch2"
|
||||
assert check_watch_tags(uuid3) == [], "Watch3 should still have empty tags"
|
||||
|
||||
|
||||
def test_watch_pause_unpause_persists(client, live_server):
|
||||
"""Test that pause/unpause operations commit and persist"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Get API key
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com')
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Pause via API
|
||||
response = client.get(url_for("watch", uuid=uuid, paused='paused'), headers={'x-api-key': api_key})
|
||||
assert response.status_code == 200
|
||||
|
||||
# Check persisted to disk
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['paused'] == True, "Pause should be persisted"
|
||||
|
||||
# Unpause
|
||||
response = client.get(url_for("watch", uuid=uuid, paused='unpaused'), headers={'x-api-key': api_key})
|
||||
assert response.status_code == 200
|
||||
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['paused'] == False, "Unpause should be persisted"
|
||||
|
||||
|
||||
def test_watch_mute_unmute_persists(client, live_server):
|
||||
"""Test that mute/unmute operations commit and persist"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Get API key
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com')
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Mute via API
|
||||
response = client.get(url_for("watch", uuid=uuid, muted='muted'), headers={'x-api-key': api_key})
|
||||
assert response.status_code == 200
|
||||
|
||||
# Check persisted to disk
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['notification_muted'] == True, "Mute should be persisted"
|
||||
|
||||
# Unmute
|
||||
response = client.get(url_for("watch", uuid=uuid, muted='unmuted'), headers={'x-api-key': api_key})
|
||||
assert response.status_code == 200
|
||||
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['notification_muted'] == False, "Unmute should be persisted"
|
||||
|
||||
|
||||
def test_ui_watch_edit_persists_all_fields(client, live_server):
|
||||
"""Test that UI watch edit form persists all modified fields"""
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore_path = datastore.datastore_path
|
||||
|
||||
# Create watch
|
||||
uuid = datastore.add_watch(url='http://example.com')
|
||||
|
||||
# Edit via UI with multiple field changes
|
||||
response = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
'url': 'http://updated-example.com',
|
||||
'title': 'Updated Watch Title',
|
||||
'time_between_check-hours': '2',
|
||||
'time_between_check-minutes': '30',
|
||||
'include_filters': '#content',
|
||||
'fetch_backend': 'html_requests',
|
||||
'method': 'POST',
|
||||
'ignore_text': 'Advertisement\nTracking'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Updated watch" in response.data or b"Saved" in response.data
|
||||
|
||||
# Simulate restart
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
|
||||
datastore2.reload_state(
|
||||
datastore_path=datastore_path,
|
||||
include_default_watches=False,
|
||||
version_tag='test'
|
||||
)
|
||||
|
||||
# Verify all fields survived
|
||||
watch = datastore2.data['watching'][uuid]
|
||||
assert watch['url'] == 'http://updated-example.com'
|
||||
assert watch['title'] == 'Updated Watch Title'
|
||||
assert watch['time_between_check']['hours'] == 2
|
||||
assert watch['time_between_check']['minutes'] == 30
|
||||
assert watch['fetch_backend'] == 'html_requests'
|
||||
assert watch['method'] == 'POST'
|
||||
@@ -69,7 +69,7 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
||||
# 1. The page filtered text must contain "5" (first digit of value)
|
||||
# 2. The extracted number should be >= 20 and <= 100
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
"url": test_url,
|
||||
"fetch_backend": "html_requests",
|
||||
@@ -110,25 +110,20 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
||||
|
||||
wait_for_all_checks(client)
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(1)
|
||||
|
||||
# Case 1
|
||||
set_number_in_range_response(datastore_path=datastore_path, number="70.5")
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
time.sleep(2)
|
||||
# 75 is > 20 and < 100 and contains "5"
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
|
||||
# Case 2: Change with one condition violated
|
||||
# Number out of range (150) but contains '5'
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
|
||||
set_number_out_of_range_response(datastore_path=datastore_path, number="150.5")
|
||||
|
||||
@@ -154,7 +149,6 @@ def test_condition_validate_rule_row(client, live_server, measure_memory_usage,
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# the front end submits the current form state which should override the watch in a temporary copy
|
||||
res = client.post(
|
||||
@@ -195,12 +189,8 @@ def test_condition_validate_rule_row(client, live_server, measure_memory_usage,
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert b'false' in res.data
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("ui.form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||
@@ -230,17 +220,12 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage,
|
||||
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
)
|
||||
|
||||
# Assert the word count is counted correctly
|
||||
assert b'<td>13</td>' in res.data
|
||||
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("ui.form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
delete_all_watches(client)
|
||||
|
||||
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||
def test_lev_conditions_plugin(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -64,6 +64,7 @@ def test_DNS_errors(client, live_server, measure_memory_usage, datastore_path):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
@@ -79,7 +80,7 @@ def test_DNS_errors(client, live_server, measure_memory_usage, datastore_path):
|
||||
)
|
||||
assert found_name_resolution_error
|
||||
# Should always record that we tried
|
||||
assert bytes("just now".encode('utf-8')) in res.data
|
||||
assert "just now".encode('utf-8') in res.data or 'seconds ago'.encode('utf-8') in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
# Re 1513
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output, delete_all_watches
|
||||
from ..notification import valid_notification_formats
|
||||
|
||||
|
||||
@@ -45,7 +45,7 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
|
||||
assert b'No website watches configured' not in res.data
|
||||
assert b'No web page change detection watches configured' not in res.data
|
||||
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
@@ -118,8 +118,10 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'Warning, no filters were found' in res.data
|
||||
assert not os.path.isfile(notification_file)
|
||||
time.sleep(1)
|
||||
time.sleep(2)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5
|
||||
|
||||
time.sleep(2)
|
||||
@@ -178,6 +180,7 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
|
||||
follow_redirects=True
|
||||
)
|
||||
os.unlink(notification_file)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -185,10 +188,12 @@ def test_check_include_filters_failure_notification(client, live_server, measure
|
||||
run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('htmlcolor'), datastore_path=datastore_path)
|
||||
# Check markup send conversion didnt affect plaintext preference
|
||||
run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('text'), datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
# # live_server_setup(live_server) # Setup on conftest per function
|
||||
run_filter_test(client=client, live_server=live_server, content_filter='//*[@id="nope-doesnt-exist"]', app_notification_format=valid_notification_formats.get('htmlcolor'), datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
# Test that notification is never sent
|
||||
|
||||
@@ -197,3 +202,4 @@ def test_basic_markup_from_text(client, live_server, measure_memory_usage, datas
|
||||
from ..notification.handler import markup_text_links_to_html
|
||||
x = markup_text_links_to_html("hello https://google.com")
|
||||
assert 'a href' in x
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -166,7 +166,8 @@ def test_tag_add_in_ui(client, live_server, measure_memory_usage, datastore_path
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_group_tag_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user