mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-04-30 23:00:30 +00:00
Compare commits
56 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9548f5bd8f | |||
| 5718280518 | |||
| b24ae45860 | |||
| 0e4e1cf65e | |||
| d810dc38f4 | |||
| c1e9e012e3 | |||
| 5c29f1cee8 | |||
| a0b8d8e3ca | |||
| 1942d42b06 | |||
| 5726c5a0ac | |||
| 80f7decf4f | |||
| c66a29b011 | |||
| a1a2e5c5bf | |||
| 6e90a0bbd1 | |||
| 987789425d | |||
| 892b645147 | |||
| 278da3fa9b | |||
| c577bd700c | |||
| d4d6bb2872 | |||
| 45fb262386 | |||
| 1058debc12 | |||
| 61b41b0b16 | |||
| efe3afd383 | |||
| 84d26640cc | |||
| 2349344d9e | |||
| bdc2916c07 | |||
| 4fd477a60c | |||
| dc8b387f40 | |||
| 2149a6fe3b | |||
| f77d2bac6d | |||
| 75ecd1b793 | |||
| 4fe2a67839 | |||
| 5bbbe37436 | |||
| 83d7ce0fcf | |||
| 6bea9909ec | |||
| 1aabf967ef | |||
| 30dc4ac23b | |||
| 2658f81f02 | |||
| 674d863a21 | |||
| 0b9cfcdf09 | |||
| fd820c9330 | |||
| e02a1824c5 | |||
| 5911b7fe7a | |||
| a239480272 | |||
| fceb3cf39f | |||
| 7f631268dd | |||
| 8cc04ca7c5 | |||
| 4dec1e017b | |||
| 9d1743adbe | |||
| f34d806b09 | |||
| c22335ed01 | |||
| 0042f0c36a | |||
| 55e14cf394 | |||
| 308ccb5841 | |||
| 978e17acf6 | |||
| 73c29d1fa0 |
@@ -61,8 +61,8 @@ jobs:
|
||||
|
||||
# --- API test ---
|
||||
# This also means that the docs/api-spec.yml was shipped and could be read
|
||||
test -f /tmp/url-watches.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json)
|
||||
test -f /tmp/changedetection.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/changedetection.json)
|
||||
echo Test API KEY is $API_KEY
|
||||
curl -X POST "http://127.0.0.1:10000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
|
||||
@@ -37,10 +37,29 @@ jobs:
|
||||
${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Get current date for cache key
|
||||
id: date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
build-args: |
|
||||
PYTHON_VERSION=${{ env.PYTHON_VERSION }}
|
||||
LOGGER_LEVEL=TRACE
|
||||
tags: test-changedetectionio
|
||||
load: true
|
||||
cache-from: type=gha,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }}
|
||||
cache-to: type=gha,mode=max,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }}
|
||||
|
||||
- name: Verify build
|
||||
run: |
|
||||
echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
|
||||
echo "---- Built for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
|
||||
- name: We should be Python ${{ env.PYTHON_VERSION }} ...
|
||||
@@ -84,7 +103,7 @@ jobs:
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
@@ -111,6 +130,32 @@ jobs:
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
|
||||
- name: Test CLI options
|
||||
run: |
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-cli-opts --network changedet-network test-changedetectionio bash -c 'changedetectionio/test_cli_opts.sh' &> cli-opts-output.txt
|
||||
echo "=== CLI Options Test Output ==="
|
||||
cat cli-opts-output.txt
|
||||
|
||||
- name: CLI Memory Test
|
||||
run: |
|
||||
echo "=== Checking CLI batch mode memory usage ==="
|
||||
# Extract RSS memory value from output
|
||||
RSS_MB=$(grep -oP "Memory consumption before worker shutdown: RSS=\K[\d.]+" cli-opts-output.txt | head -1 || echo "0")
|
||||
echo "RSS Memory: ${RSS_MB} MB"
|
||||
|
||||
# Check if RSS is less than 100MB
|
||||
if [ -n "$RSS_MB" ]; then
|
||||
if (( $(echo "$RSS_MB < 100" | bc -l) )); then
|
||||
echo "✓ Memory usage is acceptable: ${RSS_MB} MB < 100 MB"
|
||||
else
|
||||
echo "✗ Memory usage too high: ${RSS_MB} MB >= 100 MB"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "⚠ Could not extract memory usage, skipping check"
|
||||
fi
|
||||
|
||||
- name: Extract memory report and logs
|
||||
if: always()
|
||||
uses: ./.github/actions/extract-memory-report
|
||||
@@ -125,6 +170,13 @@ jobs:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Store CLI test output
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||
path: cli-opts-output.txt
|
||||
|
||||
# Playwright tests
|
||||
playwright-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -343,6 +395,29 @@ jobs:
|
||||
cd changedetectionio
|
||||
./run_custom_browser_url_tests.sh
|
||||
|
||||
processor-plugin-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Basic processor plugin registration and checks
|
||||
run: |
|
||||
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
|
||||
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -441,3 +516,142 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
docker rm sig-test
|
||||
|
||||
# Upgrade path test
|
||||
upgrade-path-test:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 25
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0 # Fetch all history and tags for upgrade testing
|
||||
|
||||
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Check upgrade works without error
|
||||
run: |
|
||||
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
|
||||
|
||||
# Checkout old version and create datastore
|
||||
git checkout 0.49.1
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
pip install 'pyOpenSSL>=23.2.0'
|
||||
|
||||
echo "=== Running version 0.49.1 to create datastore ==="
|
||||
python3 ./changedetection.py -C -d /tmp/data &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready
|
||||
echo "Waiting for 0.49.1 to be ready..."
|
||||
sleep 6
|
||||
|
||||
# Extract API key from datastore (0.49.1 uses url-watches.json)
|
||||
API_KEY=$(jq -r '.settings.application.api_access_token // empty' /tmp/data/url-watches.json)
|
||||
echo "API Key: ${API_KEY:0:8}..."
|
||||
|
||||
# Create a watch with tag "github-group-test" via API
|
||||
echo "Creating test watch with tag via API..."
|
||||
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--show-error --fail \
|
||||
--retry 6 --retry-delay 1 --retry-connrefused \
|
||||
-d '{
|
||||
"url": "https://example.com/upgrade-test",
|
||||
"tag": "github-group-test"
|
||||
}'
|
||||
|
||||
echo "✓ Created watch with tag 'github-group-test'"
|
||||
|
||||
# Create a specific test URL watch
|
||||
echo "Creating test URL watch via API..."
|
||||
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--show-error --fail \
|
||||
-d '{
|
||||
"url": "http://localhost/test.txt"
|
||||
}'
|
||||
|
||||
echo "✓ Created watch for 'http://localhost/test.txt' in version 0.49.1"
|
||||
|
||||
# Stop the old version gracefully
|
||||
kill $APP_PID
|
||||
wait $APP_PID || true
|
||||
echo "✓ Version 0.49.1 stopped"
|
||||
|
||||
# Upgrade to current version (use commit SHA since we're in detached HEAD)
|
||||
echo "Upgrading to commit ${{ github.sha }}"
|
||||
git checkout ${{ github.sha }}
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
|
||||
TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
|
||||
|
||||
echo "=== Upgrade test output ==="
|
||||
cat /tmp/upgrade-test.log
|
||||
echo "✓ Datastore upgraded successfully"
|
||||
|
||||
# Now start the current version normally to verify the tag survived
|
||||
echo "=== Starting current version to verify tag exists after upgrade ==="
|
||||
timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
|
||||
APP_PID=$!
|
||||
|
||||
# Wait for app to be ready and fetch UI
|
||||
echo "Waiting for current version to be ready..."
|
||||
sleep 5
|
||||
curl --retry 6 --retry-delay 1 --retry-connrefused --silent http://127.0.0.1:5000 > /tmp/ui-output.html
|
||||
|
||||
# Verify tag exists in UI
|
||||
if grep -q "github-group-test" /tmp/ui-output.html; then
|
||||
echo "✓ Tag 'github-group-test' found in UI after upgrade"
|
||||
else
|
||||
echo "ERROR: Tag 'github-group-test' not found in UI after upgrade"
|
||||
echo "=== UI Output ==="
|
||||
cat /tmp/ui-output.html
|
||||
echo "=== App Log ==="
|
||||
cat /tmp/ui-test.log
|
||||
kill $APP_PID || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify test URL exists in UI
|
||||
if grep -q "http://localhost/test.txt" /tmp/ui-output.html; then
|
||||
echo "✓ Watch URL 'http://localhost/test.txt' found in UI after upgrade"
|
||||
else
|
||||
echo "ERROR: Watch URL 'http://localhost/test.txt' not found in UI after upgrade"
|
||||
echo "=== UI Output ==="
|
||||
cat /tmp/ui-output.html
|
||||
echo "=== App Log ==="
|
||||
cat /tmp/ui-test.log
|
||||
kill $APP_PID || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
kill $APP_PID || true
|
||||
wait $APP_PID || true
|
||||
|
||||
echo ""
|
||||
echo "✓✓✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }} ✓✓✓"
|
||||
echo " - Commit: ${{ github.sha }}"
|
||||
echo " - Datastore migrated successfully"
|
||||
echo " - Tag 'github-group-test' survived upgrade"
|
||||
echo " - Watch URL 'http://localhost/test.txt' survived upgrade"
|
||||
|
||||
echo "✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }}"
|
||||
|
||||
- name: Upload upgrade test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/upgrade-test.log
|
||||
|
||||
@@ -29,3 +29,4 @@ test-datastore/
|
||||
|
||||
# Memory consumption log
|
||||
test-memory.log
|
||||
tests/logs/
|
||||
|
||||
@@ -138,6 +138,15 @@ ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy and set up entrypoint script for installing extra packages
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh
|
||||
|
||||
# Set entrypoint to handle EXTRA_PACKAGES env var
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
|
||||
# Default command (can be overridden in docker-compose.yml)
|
||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ recursive-include changedetectionio/notification *
|
||||
recursive-include changedetectionio/processors *
|
||||
recursive-include changedetectionio/realtime *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/store *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
recursive-include changedetectionio/translations *
|
||||
|
||||
+406
-60
@@ -2,23 +2,24 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.52.8'
|
||||
__version__ = '0.52.9'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
from loguru import logger
|
||||
import getopt
|
||||
import logging
|
||||
import os
|
||||
import getopt
|
||||
import platform
|
||||
import signal
|
||||
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
# Eventlet completely removed - using threading mode for SocketIO
|
||||
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
from loguru import logger
|
||||
# Note: store and changedetection_app are imported inside main() to avoid
|
||||
# initialization before argument parsing (allows --help to work without loading everything)
|
||||
|
||||
# ==============================================================================
|
||||
# Multiprocessing Configuration - CRITICAL for Thread Safety
|
||||
@@ -83,15 +84,26 @@ def get_version():
|
||||
def sigshutdown_handler(_signo, _stack_frame):
|
||||
name = signal.Signals(_signo).name
|
||||
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
|
||||
|
||||
|
||||
# Set exit flag immediately to stop all loops
|
||||
app.config.exit.set()
|
||||
datastore.stop_thread = True
|
||||
|
||||
|
||||
# Log memory consumption before shutting down workers (cross-platform)
|
||||
try:
|
||||
import psutil
|
||||
process = psutil.Process()
|
||||
mem_info = process.memory_info()
|
||||
rss_mb = mem_info.rss / 1024 / 1024
|
||||
vms_mb = mem_info.vms / 1024 / 1024
|
||||
logger.info(f"Memory consumption before worker shutdown: RSS={rss_mb:,.2f} MB, VMS={vms_mb:,.2f} MB")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not retrieve memory stats: {str(e)}")
|
||||
|
||||
# Shutdown workers and queues immediately
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
from changedetectionio import worker_pool
|
||||
worker_pool.shutdown_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down workers: {str(e)}")
|
||||
|
||||
@@ -100,9 +112,9 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
update_q.close()
|
||||
notification_q.close()
|
||||
logger.debug("Janus queues closed successfully")
|
||||
logger.debug("Queues closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
|
||||
logger.critical(f"CRITICAL: Failed to close queues: {e}")
|
||||
|
||||
# Shutdown socketio server fast
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
@@ -112,31 +124,80 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down Socket.IO server: {str(e)}")
|
||||
|
||||
# Save data quickly
|
||||
try:
|
||||
datastore.sync_to_json()
|
||||
logger.success('Fast sync to disk complete.')
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing to disk: {str(e)}")
|
||||
|
||||
# With immediate persistence, all data is already saved
|
||||
logger.success('All data already persisted (immediate commits enabled).')
|
||||
|
||||
sys.exit()
|
||||
|
||||
def print_help():
|
||||
"""Print help text for command line options"""
|
||||
print('Usage: changedetection.py [options]')
|
||||
print('')
|
||||
print('Standard options:')
|
||||
print(' -s SSL enable')
|
||||
print(' -h HOST Listen host (default: 0.0.0.0)')
|
||||
print(' -p PORT Listen port (default: 5000)')
|
||||
print(' -d PATH Datastore path')
|
||||
print(' -l LEVEL Log level (TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL)')
|
||||
print(' -c Cleanup unused snapshots')
|
||||
print(' -C Create datastore directory if it doesn\'t exist')
|
||||
print(' -P true/false Set all watches paused (true) or active (false)')
|
||||
print('')
|
||||
print('Add URLs on startup:')
|
||||
print(' -u URL Add URL to watch (can be used multiple times)')
|
||||
print(' -u0 \'JSON\' Set options for first -u URL (e.g. \'{"processor":"text_json_diff"}\')')
|
||||
print(' -u1 \'JSON\' Set options for second -u URL (0-indexed)')
|
||||
print(' -u2 \'JSON\' Set options for third -u URL, etc.')
|
||||
print(' Available options: processor, fetch_backend, headers, method, etc.')
|
||||
print(' See model/Watch.py for all available options')
|
||||
print('')
|
||||
print('Recheck on startup:')
|
||||
print(' -r all Queue all watches for recheck on startup')
|
||||
print(' -r UUID,... Queue specific watches (comma-separated UUIDs)')
|
||||
print(' -r all N Queue all watches, wait for completion, repeat N times')
|
||||
print(' -r UUID,... N Queue specific watches, wait for completion, repeat N times')
|
||||
print('')
|
||||
print('Batch mode:')
|
||||
print(' -b Run in batch mode (process queue then exit)')
|
||||
print(' Useful for CI/CD, cron jobs, or one-time checks')
|
||||
print(' NOTE: Batch mode checks if Flask is running and aborts if port is in use')
|
||||
print(' Use -p PORT to specify a different port if needed')
|
||||
print('')
|
||||
|
||||
def main():
|
||||
global datastore
|
||||
global app
|
||||
|
||||
# Early help/version check before any initialization
|
||||
if '--help' in sys.argv or '-help' in sys.argv:
|
||||
print_help()
|
||||
sys.exit(0)
|
||||
|
||||
if '--version' in sys.argv or '-v' in sys.argv:
|
||||
print(f'changedetection.io {__version__}')
|
||||
sys.exit(0)
|
||||
|
||||
# Import heavy modules after help/version checks to keep startup fast for those flags
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
# Optional URL to watch since start
|
||||
default_url = None
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
include_default_watches = True
|
||||
all_paused = None # None means don't change, True/False to set
|
||||
|
||||
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
|
||||
port = int(os.environ.get('PORT', 5000))
|
||||
ssl_mode = False
|
||||
|
||||
# Lists for multiple URLs and their options
|
||||
urls_to_add = []
|
||||
url_options = {} # Key: index (0-based), Value: dict of options
|
||||
recheck_watches = None # None, 'all', or list of UUIDs
|
||||
recheck_repeat_count = 1 # Number of times to repeat recheck cycle
|
||||
batch_mode = False # Run once then exit when queue is empty
|
||||
|
||||
# On Windows, create and use a default path.
|
||||
if os.name == 'nt':
|
||||
datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
|
||||
@@ -145,10 +206,68 @@ def main():
|
||||
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
|
||||
datastore_path = os.path.join(os.getcwd(), "../datastore")
|
||||
|
||||
# Pre-process arguments to extract -u, -u<N>, and -r options before getopt
|
||||
# This allows unlimited -u0, -u1, -u2, ... options without predefining them
|
||||
cleaned_argv = ['changedetection.py'] # Start with program name
|
||||
i = 1
|
||||
while i < len(sys.argv):
|
||||
arg = sys.argv[i]
|
||||
|
||||
# Handle -u (add URL)
|
||||
if arg == '-u' and i + 1 < len(sys.argv):
|
||||
urls_to_add.append(sys.argv[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -u<N> (set options for URL at index N)
|
||||
if arg.startswith('-u') and len(arg) > 2 and arg[2:].isdigit():
|
||||
idx = int(arg[2:])
|
||||
if i + 1 < len(sys.argv):
|
||||
try:
|
||||
import json
|
||||
url_options[idx] = json.loads(sys.argv[i + 1])
|
||||
except json.JSONDecodeError as e:
|
||||
print(f'Error: Invalid JSON for {arg}: {sys.argv[i + 1]}')
|
||||
print(f'JSON decode error: {e}')
|
||||
sys.exit(2)
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -r (recheck watches)
|
||||
if arg == '-r' and i + 1 < len(sys.argv):
|
||||
recheck_arg = sys.argv[i + 1]
|
||||
if recheck_arg.lower() == 'all':
|
||||
recheck_watches = 'all'
|
||||
else:
|
||||
# Parse comma-separated list of UUIDs
|
||||
recheck_watches = [uuid.strip() for uuid in recheck_arg.split(',') if uuid.strip()]
|
||||
|
||||
# Check for optional repeat count as third argument
|
||||
if i + 2 < len(sys.argv) and sys.argv[i + 2].isdigit():
|
||||
recheck_repeat_count = int(sys.argv[i + 2])
|
||||
if recheck_repeat_count < 1:
|
||||
print(f'Error: Repeat count must be at least 1, got {recheck_repeat_count}')
|
||||
sys.exit(2)
|
||||
i += 3
|
||||
else:
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -b (batch mode - run once and exit)
|
||||
if arg == '-b':
|
||||
batch_mode = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Keep other arguments for getopt
|
||||
cleaned_argv.append(arg)
|
||||
i += 1
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
|
||||
opts, args = getopt.getopt(cleaned_argv[1:], "6Csd:h:p:l:P:", "port")
|
||||
except getopt.GetoptError as e:
|
||||
print_help()
|
||||
print(f'Error: {e}')
|
||||
sys.exit(2)
|
||||
|
||||
create_datastore_dir = False
|
||||
@@ -173,14 +292,6 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
if opt == '-u':
|
||||
default_url = arg
|
||||
include_default_watches = False
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
|
||||
# Create the datadir if it doesnt exist
|
||||
if opt == '-C':
|
||||
create_datastore_dir = True
|
||||
@@ -188,6 +299,18 @@ def main():
|
||||
if opt == '-l':
|
||||
logger_level = int(arg) if arg.isdigit() else arg.upper()
|
||||
|
||||
if opt == '-P':
|
||||
try:
|
||||
all_paused = bool(strtobool(arg))
|
||||
except ValueError:
|
||||
print(f'Error: Invalid value for -P option: {arg}')
|
||||
print('Expected: true, false, yes, no, 1, or 0')
|
||||
sys.exit(2)
|
||||
|
||||
# If URLs are provided, don't include default watches
|
||||
if urls_to_add:
|
||||
include_default_watches = False
|
||||
|
||||
|
||||
logger.success(f"changedetection.io version {get_version()} starting.")
|
||||
# Launch using SocketIO run method for proper integration (if enabled)
|
||||
@@ -224,11 +347,16 @@ def main():
|
||||
logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING)
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
app_config = {
|
||||
'datastore_path': datastore_path,
|
||||
'batch_mode': batch_mode,
|
||||
'recheck_watches': recheck_watches,
|
||||
'recheck_repeat_count': recheck_repeat_count
|
||||
}
|
||||
|
||||
if not os.path.isdir(app_config['datastore_path']):
|
||||
if create_datastore_dir:
|
||||
os.mkdir(app_config['datastore_path'])
|
||||
os.makedirs(app_config['datastore_path'], exist_ok=True)
|
||||
else:
|
||||
logger.critical(
|
||||
f"ERROR: Directory path for the datastore '{app_config['datastore_path']}'"
|
||||
@@ -243,17 +371,219 @@ def main():
|
||||
# Dont' start if the JSON DB looks corrupt
|
||||
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
|
||||
logger.critical(str(e))
|
||||
return
|
||||
sys.exit(1)
|
||||
|
||||
# Testing mode: Exit cleanly after datastore initialization (for CI/CD upgrade tests)
|
||||
if os.environ.get('TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD'):
|
||||
logger.success(f"TESTING MODE: Datastore loaded successfully from {app_config['datastore_path']}")
|
||||
logger.success(f"TESTING MODE: Schema version: {datastore.data['settings']['application'].get('schema_version', 'unknown')}")
|
||||
logger.success(f"TESTING MODE: Loaded {len(datastore.data['watching'])} watches")
|
||||
logger.success("TESTING MODE: Exiting cleanly (TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD is set)")
|
||||
sys.exit(0)
|
||||
|
||||
# Apply all_paused setting if specified via CLI
|
||||
if all_paused is not None:
|
||||
datastore.data['settings']['application']['all_paused'] = all_paused
|
||||
logger.info(f"Setting all watches paused: {all_paused}")
|
||||
|
||||
# Inject datastore into plugins that need access to settings
|
||||
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
|
||||
inject_datastore_into_plugins(datastore)
|
||||
|
||||
if default_url:
|
||||
datastore.add_watch(url = default_url)
|
||||
# Step 1: Add URLs with their options (if provided via -u flags)
|
||||
added_watch_uuids = []
|
||||
if urls_to_add:
|
||||
logger.info(f"Adding {len(urls_to_add)} URL(s) from command line")
|
||||
for idx, url in enumerate(urls_to_add):
|
||||
extras = url_options.get(idx, {})
|
||||
if extras:
|
||||
logger.debug(f"Adding watch {idx}: {url} with options: {extras}")
|
||||
else:
|
||||
logger.debug(f"Adding watch {idx}: {url}")
|
||||
|
||||
new_uuid = datastore.add_watch(url=url, extras=extras)
|
||||
if new_uuid:
|
||||
added_watch_uuids.append(new_uuid)
|
||||
logger.success(f"Added watch: {url} (UUID: {new_uuid})")
|
||||
else:
|
||||
logger.error(f"Failed to add watch: {url}")
|
||||
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Step 2: Queue newly added watches (if -u was provided in batch mode)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if batch_mode and added_watch_uuids:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
logger.info(f"Batch mode: Queuing {len(added_watch_uuids)} newly added watches")
|
||||
for watch_uuid in added_watch_uuids:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
logger.debug(f"Queued newly added watch: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
|
||||
# Step 3: Queue watches for recheck (if -r was provided)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if recheck_watches is not None:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
# Queue all watches, excluding those already queued in batch mode
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids:
|
||||
# Exclude newly added watches that were already queued in batch mode
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
logger.info(f"Queuing {len(watches_to_queue)} existing watches for recheck ({len(added_watch_uuids)} newly added watches already queued)")
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Queuing all {len(watches_to_queue)} watches for recheck")
|
||||
else:
|
||||
# Queue specific UUIDs
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Queuing {len(watches_to_queue)} specific watches for recheck")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
logger.debug(f"Queued watch for recheck: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
|
||||
logger.success(f"Successfully queued {queued_count} watches for recheck")
|
||||
|
||||
# Step 4: Setup batch mode monitor (if -b was provided)
|
||||
if batch_mode:
|
||||
from changedetectionio.flask_app import update_q
|
||||
|
||||
# Safety check: Ensure Flask app is not already running on this port
|
||||
# Batch mode should never run alongside the web server
|
||||
import socket
|
||||
test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
|
||||
try:
|
||||
# Try to bind to the configured host:port (no SO_REUSEADDR - strict check)
|
||||
test_socket.bind((host, port))
|
||||
test_socket.close()
|
||||
logger.debug(f"Batch mode: Port {port} is available (Flask app not running)")
|
||||
except OSError as e:
|
||||
test_socket.close()
|
||||
# errno 98 = EADDRINUSE (Linux)
|
||||
# errno 48 = EADDRINUSE (macOS)
|
||||
# errno 10048 = WSAEADDRINUSE (Windows)
|
||||
if e.errno in (48, 98, 10048) or "Address already in use" in str(e) or "already in use" in str(e).lower():
|
||||
logger.critical(f"ERROR: Batch mode cannot run - port {port} is already in use")
|
||||
logger.critical(f"The Flask web server appears to be running on {host}:{port}")
|
||||
logger.critical(f"Batch mode is designed for standalone operation (CI/CD, cron jobs, etc.)")
|
||||
logger.critical(f"Please either stop the Flask web server, or use a different port with -p PORT")
|
||||
sys.exit(1)
|
||||
else:
|
||||
# Some other socket error - log but continue (might be network configuration issue)
|
||||
logger.warning(f"Port availability check failed with unexpected error: {e}")
|
||||
logger.warning(f"Continuing with batch mode anyway - be aware of potential conflicts")
|
||||
|
||||
def queue_watches_for_recheck(datastore, iteration):
|
||||
"""Helper function to queue watches for recheck"""
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids and iteration == 1:
|
||||
# Only exclude newly added watches on first iteration
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing all {len(watches_to_queue)} watches")
|
||||
elif recheck_watches:
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing {len(watches_to_queue)} specific watches")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
logger.success(f"Batch mode (iteration {iteration}): Successfully queued {queued_count} watches")
|
||||
return queued_count
|
||||
|
||||
def batch_mode_monitor():
|
||||
"""Monitor queue and workers, shutdown or repeat when work is complete"""
|
||||
import time
|
||||
|
||||
# Track iterations if repeat mode is enabled
|
||||
current_iteration = 1
|
||||
total_iterations = recheck_repeat_count if recheck_watches and recheck_repeat_count > 1 else 1
|
||||
|
||||
if total_iterations > 1:
|
||||
logger.info(f"Batch mode: Will repeat recheck {total_iterations} times")
|
||||
else:
|
||||
logger.info("Batch mode: Waiting for all queued items to complete...")
|
||||
|
||||
# Wait a bit for workers to start processing
|
||||
time.sleep(3)
|
||||
|
||||
try:
|
||||
while current_iteration <= total_iterations:
|
||||
logger.info(f"Batch mode: Waiting for iteration {current_iteration}/{total_iterations} to complete...")
|
||||
|
||||
# Use the shared wait_for_all_checks function
|
||||
completed = worker_pool.wait_for_all_checks(update_q, timeout=300)
|
||||
|
||||
if not completed:
|
||||
logger.warning(f"Batch mode: Iteration {current_iteration} timed out after 300 seconds")
|
||||
|
||||
logger.success(f"Batch mode: Iteration {current_iteration}/{total_iterations} completed")
|
||||
|
||||
# Check if we need to repeat
|
||||
if current_iteration < total_iterations:
|
||||
logger.info(f"Batch mode: Starting iteration {current_iteration + 1}...")
|
||||
current_iteration += 1
|
||||
|
||||
# Re-queue watches for next iteration
|
||||
queue_watches_for_recheck(datastore, current_iteration)
|
||||
|
||||
# Brief pause before continuing
|
||||
time.sleep(2)
|
||||
else:
|
||||
# All iterations complete
|
||||
logger.success(f"Batch mode: All {total_iterations} iterations completed, initiating shutdown")
|
||||
# Trigger shutdown
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch mode monitor error: {e}")
|
||||
logger.error(f"Initiating emergency shutdown")
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
|
||||
# Start monitor in background thread
|
||||
monitor_thread = threading.Thread(target=batch_mode_monitor, daemon=True, name="BatchModeMonitor")
|
||||
monitor_thread.start()
|
||||
logger.info("Batch mode enabled: Will exit after all queued items are processed")
|
||||
|
||||
# Get the SocketIO instance from the Flask app (created in flask_app.py)
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
global socketio
|
||||
@@ -275,10 +605,6 @@ def main():
|
||||
else:
|
||||
logger.info("SIGUSR1 handler only registered on Linux, skipped.")
|
||||
|
||||
# Go into cleanup mode
|
||||
if do_cleanup:
|
||||
datastore.remove_unused_snapshots()
|
||||
|
||||
app.config['datastore_path'] = datastore_path
|
||||
|
||||
|
||||
@@ -287,7 +613,7 @@ def main():
|
||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False,
|
||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True),
|
||||
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
|
||||
all_paused=datastore.data['settings']['application'].get('all_paused', False),
|
||||
all_muted=datastore.data['settings']['application'].get('all_muted', False)
|
||||
)
|
||||
@@ -311,23 +637,43 @@ def main():
|
||||
if os.getenv('USE_X_SETTINGS'):
|
||||
logger.info("USE_X_SETTINGS is ENABLED")
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
||||
app.wsgi_app = ProxyFix(
|
||||
app.wsgi_app,
|
||||
x_for=1, # X-Forwarded-For (client IP)
|
||||
x_proto=1, # X-Forwarded-Proto (http/https)
|
||||
x_host=1, # X-Forwarded-Host (original host)
|
||||
x_port=1, # X-Forwarded-Port (original port)
|
||||
x_prefix=1 # X-Forwarded-Prefix (URL prefix)
|
||||
)
|
||||
|
||||
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
# In batch mode, skip starting the HTTP server - just keep workers running
|
||||
if batch_mode:
|
||||
logger.info("Batch mode: Skipping HTTP server startup, workers will process queue")
|
||||
logger.info("Batch mode: Main thread will wait for shutdown signal")
|
||||
# Keep main thread alive until batch monitor triggers shutdown
|
||||
try:
|
||||
while True:
|
||||
time.sleep(1)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Batch mode: Keyboard interrupt received")
|
||||
pass
|
||||
else:
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
# Normal mode: Start HTTP server
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
|
||||
@@ -2,8 +2,12 @@ from changedetectionio.strtobool import strtobool
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request
|
||||
from functools import wraps
|
||||
from . import auth, validate_openapi_request
|
||||
from . import auth, validate_openapi_request, schema_create_watch
|
||||
from ..validate_url import is_safe_valid_url
|
||||
import json
|
||||
|
||||
# Number of URLs above which import switches to background processing
|
||||
IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD = 20
|
||||
|
||||
|
||||
def default_content_type(content_type='text/plain'):
|
||||
@@ -19,6 +23,62 @@ def default_content_type(content_type='text/plain'):
|
||||
return decorator
|
||||
|
||||
|
||||
def convert_query_param_to_type(value, schema_property):
|
||||
"""
|
||||
Convert a query parameter string to the appropriate type based on schema definition.
|
||||
|
||||
Args:
|
||||
value: String value from query parameter
|
||||
schema_property: Schema property definition with 'type' or 'anyOf' field
|
||||
|
||||
Returns:
|
||||
Converted value in the appropriate type
|
||||
"""
|
||||
# Handle anyOf schemas (extract the first type)
|
||||
if 'anyOf' in schema_property:
|
||||
# Use the first non-null type from anyOf
|
||||
for option in schema_property['anyOf']:
|
||||
if option.get('type') and option.get('type') != 'null':
|
||||
prop_type = option.get('type')
|
||||
break
|
||||
else:
|
||||
prop_type = None
|
||||
else:
|
||||
prop_type = schema_property.get('type')
|
||||
|
||||
# Handle array type (e.g., notification_urls)
|
||||
if prop_type == 'array':
|
||||
# Support both comma-separated and JSON array format
|
||||
if value.startswith('['):
|
||||
try:
|
||||
return json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
return [v.strip() for v in value.split(',')]
|
||||
return [v.strip() for v in value.split(',')]
|
||||
|
||||
# Handle object type (e.g., time_between_check, headers)
|
||||
elif prop_type == 'object':
|
||||
try:
|
||||
return json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError(f"Invalid JSON object for field: {value}")
|
||||
|
||||
# Handle boolean type
|
||||
elif prop_type == 'boolean':
|
||||
return strtobool(value)
|
||||
|
||||
# Handle integer type
|
||||
elif prop_type == 'integer':
|
||||
return int(value)
|
||||
|
||||
# Handle number type (float)
|
||||
elif prop_type == 'number':
|
||||
return float(value)
|
||||
|
||||
# Default: return as string
|
||||
return value
|
||||
|
||||
|
||||
class Import(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
@@ -28,40 +88,127 @@ class Import(Resource):
|
||||
@default_content_type('text/plain') #3547 #3542
|
||||
@validate_openapi_request('importWatches')
|
||||
def post(self):
|
||||
"""Import a list of watched URLs."""
|
||||
"""Import a list of watched URLs with optional watch configuration."""
|
||||
|
||||
# Special parameters that are NOT watch configuration
|
||||
special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'}
|
||||
|
||||
extras = {}
|
||||
|
||||
# Handle special 'proxy' parameter
|
||||
if request.args.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not request.args.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
else:
|
||||
extras['proxy'] = request.args.get('proxy')
|
||||
|
||||
# Handle special 'dedupe' parameter
|
||||
dedupe = strtobool(request.args.get('dedupe', 'true'))
|
||||
|
||||
# Handle special 'tag' and 'tag_uuids' parameters
|
||||
tags = request.args.get('tag')
|
||||
tag_uuids = request.args.get('tag_uuids')
|
||||
|
||||
if tag_uuids:
|
||||
tag_uuids = tag_uuids.split(',')
|
||||
|
||||
# Extract ALL other query parameters as watch configuration
|
||||
schema_properties = schema_create_watch.get('properties', {})
|
||||
for param_name, param_value in request.args.items():
|
||||
# Skip special parameters
|
||||
if param_name in special_params:
|
||||
continue
|
||||
|
||||
# Skip if not in schema (unknown parameter)
|
||||
if param_name not in schema_properties:
|
||||
return f"Unknown watch configuration parameter: {param_name}", 400
|
||||
|
||||
# Convert to appropriate type based on schema
|
||||
try:
|
||||
converted_value = convert_query_param_to_type(param_value, schema_properties[param_name])
|
||||
extras[param_name] = converted_value
|
||||
except (ValueError, json.JSONDecodeError) as e:
|
||||
return f"Invalid value for parameter '{param_name}': {str(e)}", 400
|
||||
|
||||
# Validate processor if provided
|
||||
if 'processor' in extras:
|
||||
from changedetectionio.processors import available_processors
|
||||
available = [p[0] for p in available_processors()]
|
||||
if extras['processor'] not in available:
|
||||
return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
|
||||
|
||||
# Validate fetch_backend if provided
|
||||
if 'fetch_backend' in extras:
|
||||
from changedetectionio.content_fetchers import available_fetchers
|
||||
available = [f[0] for f in available_fetchers()]
|
||||
# Also allow 'system' and extra_browser_* patterns
|
||||
is_valid = (
|
||||
extras['fetch_backend'] == 'system' or
|
||||
extras['fetch_backend'] in available or
|
||||
extras['fetch_backend'].startswith('extra_browser_')
|
||||
)
|
||||
if not is_valid:
|
||||
return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in extras:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
validate_notification_urls(extras['notification_urls'])
|
||||
except ValidationError as e:
|
||||
return f"Invalid notification_urls: {str(e)}", 400
|
||||
|
||||
urls = request.get_data().decode('utf8').splitlines()
|
||||
added = []
|
||||
# Clean and validate URLs upfront
|
||||
urls_to_import = []
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if not len(url):
|
||||
continue
|
||||
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
# Validate URL
|
||||
if not is_safe_valid_url(url):
|
||||
return f"Invalid or unsupported URL - {url}", 400
|
||||
|
||||
# Check for duplicates if dedupe is enabled
|
||||
if dedupe and self.datastore.url_exists(url):
|
||||
continue
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added.append(new_uuid)
|
||||
urls_to_import.append(url)
|
||||
|
||||
return added
|
||||
# For small imports, process synchronously for immediate feedback
|
||||
if len(urls_to_import) < IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD:
|
||||
added = []
|
||||
for url in urls_to_import:
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added.append(new_uuid)
|
||||
return added, 200
|
||||
|
||||
# For large imports (>= 20), process in background thread
|
||||
else:
|
||||
import threading
|
||||
from loguru import logger
|
||||
|
||||
def import_watches_background():
|
||||
"""Background thread to import watches - discarded after completion."""
|
||||
try:
|
||||
added_count = 0
|
||||
for url in urls_to_import:
|
||||
try:
|
||||
self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Error importing URL {url}: {e}")
|
||||
|
||||
logger.info(f"Background import complete: {added_count} watches created")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background import: {e}")
|
||||
|
||||
# Start background thread and return immediately
|
||||
thread = threading.Thread(target=import_watches_background, daemon=True, name="ImportWatches-Background")
|
||||
thread.start()
|
||||
|
||||
return {'status': f'Importing {len(urls_to_import)} URLs in background', 'count': len(urls_to_import)}, 202
|
||||
@@ -67,7 +67,7 @@ class Notifications(Resource):
|
||||
|
||||
clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)]
|
||||
self.datastore.data['settings']['application']['notification_urls'] = clean_urls
|
||||
self.datastore.needs_write = True
|
||||
self.datastore.commit()
|
||||
|
||||
return {'notification_urls': clean_urls}, 200
|
||||
|
||||
@@ -95,7 +95,7 @@ class Notifications(Resource):
|
||||
abort(400, message="No matching notification URLs found.")
|
||||
|
||||
self.datastore.data['settings']['application']['notification_urls'] = notification_urls
|
||||
self.datastore.needs_write = True
|
||||
self.datastore.commit()
|
||||
|
||||
return 'OK', 204
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
@@ -24,8 +24,7 @@ class Tag(Resource):
|
||||
@validate_openapi_request('getTag')
|
||||
def get(self, uuid):
|
||||
"""Get data for a single tag/group, toggle notification muting, or recheck all."""
|
||||
from copy import deepcopy
|
||||
tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if not tag:
|
||||
abort(404, message=f'No tag exists with the UUID of {uuid}')
|
||||
|
||||
@@ -42,7 +41,7 @@ class Tag(Resource):
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
return {'status': f'OK, queued {len(watches_to_queue)} watches for rechecking'}, 200
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
@@ -50,7 +49,7 @@ class Tag(Resource):
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
try:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
logger.info(f"Background queueing complete for tag {tag['uuid']}: {len(watches_to_queue)} watches queued")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing for tag {tag['uuid']}: {e}")
|
||||
@@ -62,10 +61,12 @@ class Tag(Resource):
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
|
||||
tag['notification_muted'] = True
|
||||
tag.commit()
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False
|
||||
tag['notification_muted'] = False
|
||||
tag.commit()
|
||||
return "OK", 200
|
||||
|
||||
return tag
|
||||
@@ -79,11 +80,23 @@ class Tag(Resource):
|
||||
|
||||
# Delete the tag, and any tag reference
|
||||
del self.datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
|
||||
# Delete tag.json file if it exists
|
||||
import os
|
||||
tag_dir = os.path.join(self.datastore.datastore_path, uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
||||
|
||||
# Remove tag from all watches
|
||||
for watch_uuid, watch in self.datastore.data['watching'].items():
|
||||
if watch.get('tags') and uuid in watch['tags']:
|
||||
watch['tags'].remove(uuid)
|
||||
watch.commit()
|
||||
|
||||
return 'OK', 204
|
||||
|
||||
@@ -96,8 +109,18 @@ class Tag(Resource):
|
||||
if not tag:
|
||||
abort(404, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in request.json:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = request.json.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
tag.update(request.json)
|
||||
self.datastore.needs_write_urgent = True
|
||||
tag.commit()
|
||||
|
||||
return "OK", 200
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
from . import auth
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from flask import request, make_response, send_from_directory
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
@@ -66,47 +66,46 @@ class Watch(Resource):
|
||||
@validate_openapi_request('getWatch')
|
||||
def get(self, uuid):
|
||||
"""Get information about a single watch, recheck, pause, or mute."""
|
||||
import time
|
||||
from copy import deepcopy
|
||||
watch = None
|
||||
# Retry up to 20 times if dict is being modified
|
||||
# With sleep(0), this is fast: ~200µs best case, ~20ms worst case under heavy load
|
||||
for attempt in range(20):
|
||||
try:
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
break
|
||||
except RuntimeError:
|
||||
# Dict changed during deepcopy, retry after yielding to scheduler
|
||||
# sleep(0) releases GIL and yields - no fixed delay, just lets other threads run
|
||||
if attempt < 19: # Don't yield on last attempt
|
||||
time.sleep(0) # Yield to scheduler (microseconds, not milliseconds)
|
||||
|
||||
if not watch:
|
||||
# Get watch reference first (for pause/mute operations)
|
||||
watch_obj = self.datastore.data['watching'].get(uuid)
|
||||
if not watch_obj:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Create a dict copy for JSON response (with lock for thread safety)
|
||||
# This is much faster than deepcopy and doesn't copy the datastore reference
|
||||
# WARNING: dict() is a SHALLOW copy - nested dicts are shared with original!
|
||||
# Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts
|
||||
# If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj))
|
||||
with self.datastore.lock:
|
||||
watch = dict(watch_obj)
|
||||
|
||||
if request.args.get('recheck'):
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return "OK", 200
|
||||
if request.args.get('paused', '') == 'paused':
|
||||
self.datastore.data['watching'].get(uuid).pause()
|
||||
watch_obj.pause()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
elif request.args.get('paused', '') == 'unpaused':
|
||||
self.datastore.data['watching'].get(uuid).unpause()
|
||||
watch_obj.unpause()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
self.datastore.data['watching'].get(uuid).mute()
|
||||
watch_obj.mute()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
self.datastore.data['watching'].get(uuid).unmute()
|
||||
watch_obj.unmute()
|
||||
watch_obj.commit()
|
||||
return "OK", 200
|
||||
|
||||
# Return without history, get that via another API call
|
||||
# Properties are not returned as a JSON, so add the required props manually
|
||||
watch['history_n'] = watch.history_n
|
||||
watch['history_n'] = watch_obj.history_n
|
||||
# attr .last_changed will check for the last written text snapshot on change
|
||||
watch['last_changed'] = watch.last_changed
|
||||
watch['viewed'] = watch.viewed
|
||||
watch['link'] = watch.link,
|
||||
watch['last_changed'] = watch_obj.last_changed
|
||||
watch['viewed'] = watch_obj.viewed
|
||||
watch['link'] = watch_obj.link,
|
||||
|
||||
return watch
|
||||
|
||||
@@ -140,6 +139,16 @@ class Watch(Resource):
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in request.json:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = request.json.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# XSS etc protection - validate URL if it's being updated
|
||||
if 'url' in request.json:
|
||||
new_url = request.json.get('url')
|
||||
@@ -159,58 +168,19 @@ class Watch(Resource):
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
from changedetectionio import processors
|
||||
processor_config_data = {}
|
||||
regular_data = {}
|
||||
|
||||
for key, value in request.json.items():
|
||||
if key.startswith('processor_config_'):
|
||||
config_key = key.replace('processor_config_', '')
|
||||
if value: # Only save non-empty values
|
||||
processor_config_data[config_key] = value
|
||||
else:
|
||||
regular_data[key] = value
|
||||
# Make a mutable copy of request.json for modification
|
||||
json_data = dict(request.json)
|
||||
|
||||
# Extract and remove processor config fields from json_data
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(json_data)
|
||||
|
||||
# Update watch with regular (non-processor-config) fields
|
||||
watch.update(regular_data)
|
||||
watch.update(json_data)
|
||||
watch.commit()
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = request.json.get('processor', watch.get('processor'))
|
||||
if processor_name:
|
||||
# Create a processor instance to access config methods
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
processor_instance = difference_detection_processor(self.datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"API: Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"API: Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"API: Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API: Failed to save processor config: {e}")
|
||||
# Save processor config to JSON file
|
||||
processors.save_processor_config(self.datastore, uuid, processor_config_data)
|
||||
|
||||
return "OK", 200
|
||||
|
||||
@@ -404,10 +374,10 @@ class WatchFavicon(Resource):
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
filepath = os.path.join(watch.data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
@@ -444,8 +414,24 @@ class CreateWatch(Resource):
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in json_data:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = json_data.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not watch)
|
||||
from changedetectionio import processors
|
||||
|
||||
extras = copy.deepcopy(json_data)
|
||||
|
||||
# Extract and remove processor config fields from extras
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(extras)
|
||||
|
||||
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
|
||||
tags = None
|
||||
if extras.get('tag'):
|
||||
@@ -455,11 +441,25 @@ class CreateWatch(Resource):
|
||||
del extras['url']
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||
|
||||
# Save processor config to separate JSON file
|
||||
if new_uuid and processor_config_data:
|
||||
processors.save_processor_config(self.datastore, new_uuid, processor_config_data)
|
||||
if new_uuid:
|
||||
# Dont queue because the scheduler will check that it hasnt been checked before anyway
|
||||
# worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
# worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
# Check if it was a limit issue
|
||||
page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
|
||||
if page_watch_limit:
|
||||
try:
|
||||
page_watch_limit = int(page_watch_limit)
|
||||
current_watch_count = len(self.datastore.data['watching'])
|
||||
if current_watch_count >= page_watch_limit:
|
||||
return f"Watch limit reached ({current_watch_count}/{page_watch_limit} watches). Cannot add more watches.", 429
|
||||
except ValueError:
|
||||
pass
|
||||
return "Invalid or unsupported URL", 400
|
||||
|
||||
@auth.check_token
|
||||
@@ -481,6 +481,7 @@ class CreateWatch(Resource):
|
||||
'last_error': watch['last_error'],
|
||||
'link': watch.link,
|
||||
'page_title': watch['page_title'],
|
||||
'tags': [*tags], # Unpack dict keys to list (can't use list() since variable named 'list')
|
||||
'title': watch['title'],
|
||||
'url': watch['url'],
|
||||
'viewed': watch.viewed
|
||||
@@ -494,7 +495,7 @@ class CreateWatch(Resource):
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = [
|
||||
@@ -504,7 +505,7 @@ class CreateWatch(Resource):
|
||||
|
||||
# Queue only the filtered watches
|
||||
for uuid in watches_to_queue_filtered:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
@@ -516,7 +517,7 @@ class CreateWatch(Resource):
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_all_watches_background():
|
||||
"""Background thread to queue all watches - discarded after completion."""
|
||||
@@ -526,7 +527,7 @@ class CreateWatch(Resource):
|
||||
for uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if uuid not in queued_uuids and uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
@@ -12,9 +12,17 @@ schema = api_schema.build_watch_json_schema(watch_base_config)
|
||||
schema_create_watch = copy.deepcopy(schema)
|
||||
schema_create_watch['required'] = ['url']
|
||||
del schema_create_watch['properties']['last_viewed']
|
||||
# Allow processor_config_* fields (handled separately in endpoint)
|
||||
schema_create_watch['patternProperties'] = {
|
||||
'^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
|
||||
}
|
||||
|
||||
schema_update_watch = copy.deepcopy(schema)
|
||||
schema_update_watch['additionalProperties'] = False
|
||||
# Allow processor_config_* fields (handled separately in endpoint)
|
||||
schema_update_watch['patternProperties'] = {
|
||||
'^processor_config_': {'type': ['string', 'number', 'boolean', 'object', 'array', 'null']}
|
||||
}
|
||||
|
||||
# Tag schema is also based on watch_base since Tag inherits from it
|
||||
schema_tag = copy.deepcopy(schema)
|
||||
|
||||
@@ -27,15 +27,27 @@ def create_backup(datastore_path, watches: dict):
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8) as zipObj:
|
||||
|
||||
# Add the index
|
||||
zipObj.write(os.path.join(datastore_path, "url-watches.json"), arcname="url-watches.json")
|
||||
# Add the settings file (supports both formats)
|
||||
# New format: changedetection.json
|
||||
changedetection_json = os.path.join(datastore_path, "changedetection.json")
|
||||
if os.path.isfile(changedetection_json):
|
||||
zipObj.write(changedetection_json, arcname="changedetection.json")
|
||||
logger.debug("Added changedetection.json to backup")
|
||||
|
||||
# Add the flask app secret
|
||||
zipObj.write(os.path.join(datastore_path, "secret.txt"), arcname="secret.txt")
|
||||
# Legacy format: url-watches.json (for backward compatibility)
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
if os.path.isfile(url_watches_json):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add the flask app secret (if it exists)
|
||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||
if os.path.isfile(secret_file):
|
||||
zipObj.write(secret_file, arcname="secret.txt")
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in watches.items():
|
||||
for f in Path(w.watch_data_dir).glob('*'):
|
||||
for f in Path(w.data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
# Use the full path to access the file, but make the file 'relative' in the Zip.
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
@@ -90,8 +102,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
# With immediate persistence, all data is already saved
|
||||
zip_thread = threading.Thread(
|
||||
target=create_backup,
|
||||
args=(datastore.datastore_path, datastore.data.get("watching")),
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h4>{{ _('Backups') }}</h4>
|
||||
<h2>{{ _('Backups') }}</h2>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<strong>{{ _('A backup is running!') }}</strong>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
|
||||
@@ -285,8 +285,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
filename = f"step_before-{step_n}.jpeg" if request.args.get('type', '') == 'before' else f"step_{step_n}.jpeg"
|
||||
|
||||
if step_n and watch and os.path.isfile(os.path.join(watch.watch_data_dir, filename)):
|
||||
response = make_response(send_from_directory(directory=watch.watch_data_dir, path=filename))
|
||||
if step_n and watch and os.path.isfile(os.path.join(watch.data_dir, filename)):
|
||||
response = make_response(send_from_directory(directory=watch.data_dir, path=filename))
|
||||
response.headers['Content-type'] = 'image/jpeg'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
|
||||
@@ -14,7 +14,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
from changedetectionio import forms
|
||||
#
|
||||
if request.method == 'POST':
|
||||
# from changedetectionio import worker_handler
|
||||
# from changedetectionio import worker_pool
|
||||
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
@@ -26,12 +26,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# URL List import
|
||||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
from changedetectionio import processors
|
||||
importer_handler = import_url_list()
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', processors.get_default_processor()))
|
||||
logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs")
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
if len(importer_handler.remaining_data) == 0:
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -45,7 +46,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# XLSX importer
|
||||
@@ -70,7 +71,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
|
||||
@@ -62,7 +62,7 @@ class import_url_list(Importer):
|
||||
extras = None
|
||||
if processor:
|
||||
extras = {'processor': processor}
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False, extras=extras)
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, save_immediately=False, extras=extras)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
@@ -129,7 +129,7 @@ class import_distill_io_json(Importer):
|
||||
new_uuid = datastore.add_watch(url=d['uri'].strip(),
|
||||
tag=",".join(d.get('tags', [])),
|
||||
extras=extras,
|
||||
write_to_disk_now=False)
|
||||
save_immediately=False)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
@@ -204,7 +204,7 @@ class import_xlsx_wachete(Importer):
|
||||
new_uuid = datastore.add_watch(url=data['url'].strip(),
|
||||
extras=extras,
|
||||
tag=data.get('folder'),
|
||||
write_to_disk_now=False)
|
||||
save_immediately=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
@@ -287,7 +287,7 @@ class import_xlsx_custom(Importer):
|
||||
new_uuid = datastore.add_watch(url=url,
|
||||
extras=extras,
|
||||
tag=tags,
|
||||
write_to_disk_now=False)
|
||||
save_immediately=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
|
||||
@@ -4,7 +4,7 @@ from flask import Blueprint, flash, redirect, url_for
|
||||
from flask_login import login_required
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from queue import PriorityQueue
|
||||
|
||||
PRICE_DATA_TRACK_ACCEPT = 'accepted'
|
||||
@@ -20,13 +20,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
datastore.data['watching'][uuid].commit()
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
@login_required
|
||||
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
||||
def reject(uuid):
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
||||
datastore.data['watching'][uuid].commit()
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
|
||||
|
||||
@@ -37,6 +37,8 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
# Get the watch by UUID
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import os
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from zoneinfo import ZoneInfo, available_timezones
|
||||
import secrets
|
||||
import time
|
||||
import flask_login
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
@@ -74,16 +75,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
del (app_update['password'])
|
||||
|
||||
datastore.data['settings']['application'].update(app_update)
|
||||
|
||||
|
||||
# Handle dynamic worker count adjustment
|
||||
old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
|
||||
new_worker_count = form.data['requests'].get('workers', 1)
|
||||
|
||||
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||
datastore.commit()
|
||||
|
||||
# Adjust worker count if it changed
|
||||
if new_worker_count != old_worker_count:
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
|
||||
|
||||
# Check CPU core availability and warn if worker count is high
|
||||
@@ -92,7 +94,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Warning: Worker count ({}) is close to or exceeds available CPU cores ({})").format(
|
||||
new_worker_count, cpu_count), 'warning')
|
||||
|
||||
result = worker_handler.adjust_async_worker_count(
|
||||
result = worker_pool.adjust_async_worker_count(
|
||||
new_count=new_worker_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -109,13 +111,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
|
||||
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
|
||||
datastore.needs_write_urgent = True
|
||||
datastore.commit()
|
||||
flash(gettext("Password protection enabled."), 'notice')
|
||||
flask_login.logout_user()
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Also save plugin settings from the same form submission
|
||||
plugin_tabs_list = get_plugin_settings_tabs()
|
||||
for tab in plugin_tabs_list:
|
||||
@@ -143,6 +143,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
active_plugins = get_active_plugins()
|
||||
python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
||||
|
||||
# Calculate uptime in seconds
|
||||
uptime_seconds = time.time() - datastore.start_time
|
||||
|
||||
# Get plugin settings tabs and instantiate forms
|
||||
plugin_tabs = get_plugin_settings_tabs()
|
||||
plugin_forms = {}
|
||||
@@ -161,6 +164,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
active_plugins=active_plugins,
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
python_version=python_version,
|
||||
uptime_seconds=uptime_seconds,
|
||||
available_timezones=sorted(available_timezones()),
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
|
||||
@@ -181,7 +185,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def settings_reset_api_key():
|
||||
secret = secrets.token_hex(16)
|
||||
datastore.data['settings']['application']['api_access_token'] = secret
|
||||
datastore.needs_write_urgent = True
|
||||
datastore.commit()
|
||||
flash(gettext("API Key was regenerated."))
|
||||
return redirect(url_for('settings.settings_page')+'#api')
|
||||
|
||||
@@ -198,7 +202,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def toggle_all_paused():
|
||||
current_state = datastore.data['settings']['application'].get('all_paused', False)
|
||||
datastore.data['settings']['application']['all_paused'] = not current_state
|
||||
datastore.needs_write_urgent = True
|
||||
datastore.commit()
|
||||
|
||||
if datastore.data['settings']['application']['all_paused']:
|
||||
flash(gettext("Automatic scheduling paused - checks will not be queued."), 'notice')
|
||||
@@ -212,7 +216,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def toggle_all_muted():
|
||||
current_state = datastore.data['settings']['application'].get('all_muted', False)
|
||||
datastore.data['settings']['application']['all_muted'] = not current_state
|
||||
datastore.needs_write_urgent = True
|
||||
datastore.commit()
|
||||
|
||||
if datastore.data['settings']['application']['all_muted']:
|
||||
flash(gettext("All notifications muted."), 'notice')
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.index') }}" class="pure-menu-link">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
@@ -59,6 +59,14 @@
|
||||
{{ _('Set to') }} <strong>0</strong> {{ _('to disable') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.history_snapshot_max_length, class="history_snapshot_max_length") }}
|
||||
<span class="pure-form-message-inline">{{ _('Limit collection of history snapshots for each watch to this number of history items.') }}
|
||||
<br>
|
||||
{{ _('Set to empty to disable / no limit') }}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{% if not hide_remove_pass %}
|
||||
{% if current_user.is_authenticated %}
|
||||
@@ -80,6 +88,16 @@
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">{{ _('When a request returns no content, or the HTML does not contain any text, is this considered a change?') }}</span>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">{{ _('Choose a default proxy for all watches') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
@@ -340,15 +358,6 @@ nav
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
<span class="pure-form-message-inline">{{ _('"Name" will be used for selecting the proxy in the Watch Edit settings') }}</span><br>
|
||||
<span class="pure-form-message-inline">{{ _('SOCKS5 proxies with authentication are only supported with \'plain requests\' fetcher, for other fetchers you should whitelist the IP access instead') }}</span>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">{{ _('Choose a default proxy for all watches') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group" id="extra-browsers-setting">
|
||||
<p>
|
||||
@@ -385,6 +394,7 @@ nav
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<div class="tab-pane-inner" id="info">
|
||||
<p><strong>{{ _('Uptime:') }}</strong> {{ uptime_seconds|format_duration }}</p>
|
||||
<p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
|
||||
<p><strong>{{ _('Plugins active:') }}</strong></p>
|
||||
{% if active_plugins %}
|
||||
|
||||
@@ -57,8 +57,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def mute(uuid):
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = not datastore.data['settings']['application']['tags'][uuid]['notification_muted']
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if tag:
|
||||
tag['notification_muted'] = not tag['notification_muted']
|
||||
tag.commit()
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
@@ -68,6 +70,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
del datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
# Delete tag.json file if it exists
|
||||
import os
|
||||
tag_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
||||
|
||||
# Remove tag from all watches in background thread to avoid blocking
|
||||
def remove_tag_background(tag_uuid):
|
||||
"""Background thread to remove tag from watches - discarded after completion."""
|
||||
@@ -76,6 +89,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
watch.commit()
|
||||
removed_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} removed from {removed_count} watches")
|
||||
except Exception as e:
|
||||
@@ -98,6 +112,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if watch.get('tags') and tag_uuid in watch['tags']:
|
||||
watch['tags'].remove(tag_uuid)
|
||||
watch.commit()
|
||||
unlinked_count += 1
|
||||
logger.info(f"Background: Tag {tag_uuid} unlinked from {unlinked_count} watches")
|
||||
except Exception as e:
|
||||
@@ -112,6 +127,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@tags_blueprint.route("/delete_all", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete_all():
|
||||
# Delete all tag.json files
|
||||
import os
|
||||
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
|
||||
tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")
|
||||
|
||||
# Clear all tags from settings immediately
|
||||
datastore.data['settings']['application']['tags'] = {}
|
||||
|
||||
@@ -122,6 +148,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
try:
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
watch['tags'] = []
|
||||
watch.commit()
|
||||
cleared_count += 1
|
||||
logger.info(f"Background: Cleared tags from {cleared_count} watches")
|
||||
except Exception as e:
|
||||
@@ -202,10 +229,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
|
||||
|
||||
default = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
|
||||
form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None,
|
||||
data=default,
|
||||
data=tag,
|
||||
extra_notification_tokens=datastore.get_unique_notification_tokens_available()
|
||||
)
|
||||
# @todo subclass form so validation works
|
||||
@@ -214,9 +241,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# flash(','.join(l), 'error')
|
||||
# return redirect(url_for('tags.form_tag_edit_submit', uuid=uuid))
|
||||
|
||||
datastore.data['settings']['application']['tags'][uuid].update(form.data)
|
||||
datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.needs_write_urgent = True
|
||||
tag.update(form.data)
|
||||
tag['processor'] = 'restock_diff'
|
||||
tag.commit()
|
||||
flash(gettext("Updated"))
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import time
|
||||
import threading
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session, current_app
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
@@ -10,7 +10,7 @@ from changedetectionio.blueprint.ui.notification import construct_blueprint as c
|
||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||
from changedetectionio.blueprint.ui import diff, preview
|
||||
|
||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
from flask import request, flash
|
||||
|
||||
if op == 'delete':
|
||||
@@ -24,6 +24,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['paused'] = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches paused").format(len(uuids)))
|
||||
|
||||
@@ -31,6 +32,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches unpaused").format(len(uuids)))
|
||||
|
||||
@@ -45,6 +47,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches muted").format(len(uuids)))
|
||||
|
||||
@@ -52,6 +55,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches un-muted").format(len(uuids)))
|
||||
|
||||
@@ -59,7 +63,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Recheck and require a full reprocessing
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches queued for rechecking").format(len(uuids)))
|
||||
|
||||
@@ -67,6 +71,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]["last_error"] = False
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches errors cleared").format(len(uuids)))
|
||||
|
||||
@@ -87,6 +92,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
datastore.data['watching'][uuid]['notification_body'] = None
|
||||
datastore.data['watching'][uuid]['notification_urls'] = []
|
||||
datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches set to use default notification settings").format(len(uuids)))
|
||||
|
||||
@@ -102,6 +108,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
datastore.data['watching'][uuid]['tags'] = []
|
||||
|
||||
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
||||
datastore.data['watching'][uuid].commit()
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches were tagged").format(len(uuids)))
|
||||
|
||||
@@ -109,7 +116,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update):
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool, queuedWatchMetaData, watch_check_update):
|
||||
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
||||
|
||||
# Register the edit blueprint
|
||||
@@ -217,14 +224,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
@login_optionally_required
|
||||
def form_delete():
|
||||
uuid = request.args.get('uuid')
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if uuid != 'all' and not uuid in datastore.data['watching'].keys():
|
||||
flash(gettext('The watch by UUID {} does not exist.').format(uuid), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
datastore.delete(uuid)
|
||||
flash(gettext('Deleted.'))
|
||||
|
||||
@@ -234,14 +241,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
@login_optionally_required
|
||||
def form_clone():
|
||||
uuid = request.args.get('uuid')
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
new_uuid = datastore.clone(uuid)
|
||||
|
||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
|
||||
flash(gettext('Cloned, you are editing the new watch.'))
|
||||
|
||||
@@ -257,10 +264,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
if uuid:
|
||||
# Single watch - check if already queued or running
|
||||
if worker_handler.is_watch_running(uuid) or uuid in update_q.get_queued_uuids():
|
||||
if worker_pool.is_watch_running(uuid) or uuid in update_q.get_queued_uuids():
|
||||
flash(gettext("Watch is already queued or being checked."))
|
||||
else:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
else:
|
||||
# Multiple watches - first count how many need to be queued
|
||||
@@ -279,7 +286,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = []
|
||||
@@ -289,7 +296,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
# Queue only the filtered watches
|
||||
for watch_uuid in watches_to_queue_filtered:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
@@ -305,7 +312,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
# 20+ watches - queue in background thread to avoid blocking HTTP response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_watches_background():
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
@@ -315,7 +322,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
for watch_uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if watch_uuid not in queued_uuids and watch_uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
@@ -344,7 +351,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
extra_data=extra_data,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
uuids=uuids,
|
||||
worker_handler=worker_handler,
|
||||
worker_pool=worker_pool,
|
||||
update_q=update_q,
|
||||
watch_check_update=watch_check_update,
|
||||
op=op,
|
||||
@@ -362,9 +369,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
import json
|
||||
from copy import deepcopy
|
||||
|
||||
# more for testing
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
# copy it to memory as trim off what we dont need (history)
|
||||
watch = deepcopy(datastore.data['watching'].get(uuid))
|
||||
@@ -404,4 +408,25 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/language/auto-detect", methods=['GET'])
|
||||
def delete_locale_language_session_var_if_it_exists():
|
||||
"""Clear the session locale preference to auto-detect from browser Accept-Language header"""
|
||||
if 'locale' in session:
|
||||
session.pop('locale', None)
|
||||
# Refresh Flask-Babel to clear cached locale
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
flash(gettext("Language set to auto-detect from browser"))
|
||||
|
||||
# Check if there's a redirect parameter to return to the same page
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, use it
|
||||
from changedetectionio.is_safe_url import is_safe_url
|
||||
if redirect_url and is_safe_url(redirect_url, current_app):
|
||||
return redirect(redirect_url)
|
||||
|
||||
# Otherwise redirect to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return ui_blueprint
|
||||
@@ -83,7 +83,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
If a processor doesn't have a difference module, falls back to text_json_diff.
|
||||
"""
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -101,23 +100,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have difference module, use text_json_diff as default
|
||||
from changedetectionio.processors.text_json_diff.difference import render as default_render
|
||||
@@ -144,10 +141,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/extract.py::render_form()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -157,23 +154,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
|
||||
# Call the processor's render_form() function
|
||||
if hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
# Call the processor's render_form() function
|
||||
if processor_module and hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import render_form as default_render_form
|
||||
@@ -200,7 +195,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/extract.py::process_extraction()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -213,24 +208,22 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
|
||||
# Call the processor's process_extraction() function
|
||||
if hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
# Call the processor's process_extraction() function
|
||||
if processor_module and hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import process_extraction as default_process_extraction
|
||||
@@ -267,7 +260,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
- /diff/{uuid}/processor-asset/after
|
||||
- /diff/{uuid}/processor-asset/rendered_diff
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -280,38 +273,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
return diff_blueprint
|
||||
|
||||
@@ -9,7 +9,7 @@ from jinja2 import Environment, FileSystemLoader
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio.time_handler import is_within_schedule
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||
@@ -30,14 +30,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
from changedetectionio import processors
|
||||
import importlib
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
# More for testing, possible to return the first/only
|
||||
if not datastore.data['watching'].keys():
|
||||
flash(gettext("No watches to edit"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if not uuid in datastore.data['watching']:
|
||||
flash(gettext("No watch with the UUID {} found.").format(uuid), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -72,8 +71,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
||||
if not processor_classes:
|
||||
flash(gettext("Cannot load the edit form for processor/plugin '{}', plugin missing?").format(processor_classes[1]), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing. Please select a different processor.").format(processor_name), 'error')
|
||||
# Fall back to default processor so user can still edit and change processor
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == 'text_json_diff'), None)
|
||||
if not processor_classes:
|
||||
# If even text_json_diff is missing, something is very wrong
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing.").format(processor_name), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
parent_module = processors.get_parent_module(processor_classes[0])
|
||||
|
||||
@@ -150,58 +154,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
processor_config_data = {}
|
||||
fields_to_remove = []
|
||||
for field_name, field_value in form.data.items():
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
if field_value: # Only save non-empty values
|
||||
processor_config_data[config_key] = field_value
|
||||
fields_to_remove.append(field_name)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = form.data.get('processor')
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = processors.difference_detection_processor(datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
# Try to import the edit_hook module from the processor package
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
|
||||
watch_obj = datastore.data['watching'][uuid]
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
|
||||
# Remove processor-config-* fields from form.data before updating datastore
|
||||
for field_name in fields_to_remove:
|
||||
form.data.pop(field_name, None)
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
# IMPORTANT: These must NOT be saved to url-watches.json, only to the processor-specific JSON file
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(form.data)
|
||||
processors.save_processor_config(datastore, uuid, processor_config_data)
|
||||
|
||||
# Ignore text
|
||||
form_ignore_text = form.ignore_text.data
|
||||
@@ -241,7 +197,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Recast it if need be to right data Watch handler
|
||||
watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, default=datastore.data['watching'][uuid])
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore.data, default=datastore.data['watching'][uuid])
|
||||
|
||||
# Save the watch immediately
|
||||
datastore.data['watching'][uuid].commit()
|
||||
|
||||
flash(gettext("Updated watch - unpaused!") if request.args.get('unpause_on_save') else gettext("Updated watch."))
|
||||
|
||||
# Cleanup any browsersteps session for this watch
|
||||
@@ -251,10 +211,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
except Exception as e:
|
||||
logger.debug(f"Error cleaning up browsersteps session: {e}")
|
||||
|
||||
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
|
||||
# But in the case something is added we should save straight away
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Do not queue on edit if its not within the time range
|
||||
|
||||
# @todo maybe it should never queue anyway on edit...
|
||||
@@ -283,7 +239,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
#############################
|
||||
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -311,10 +267,17 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Get fetcher capabilities instead of hardcoded logic
|
||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||
|
||||
# Add processor capabilities from module
|
||||
capabilities['supports_visual_selector'] = getattr(parent_module, 'supports_visual_selector', False)
|
||||
capabilities['supports_text_filters_and_triggers'] = getattr(parent_module, 'supports_text_filters_and_triggers', False)
|
||||
capabilities['supports_text_filters_and_triggers_elements'] = getattr(parent_module, 'supports_text_filters_and_triggers_elements', False)
|
||||
capabilities['supports_request_type'] = getattr(parent_module, 'supports_request_type', False)
|
||||
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
||||
|
||||
c = [f"processor-{watch.get('processor')}"]
|
||||
if worker_handler.is_watch_running(uuid):
|
||||
if worker_pool.is_watch_running(uuid):
|
||||
c.append('checking-now')
|
||||
|
||||
template_args = {
|
||||
@@ -371,10 +334,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
from flask import send_file
|
||||
import brotli
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.data_dir):
|
||||
latest_filename = list(watch.history.keys())[-1]
|
||||
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
html_fname = os.path.join(watch.data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
@@ -395,6 +360,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
def watch_get_preview_rendered(uuid):
|
||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||
from flask import jsonify
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
from changedetectionio.processors.text_json_diff import prepare_filter_prevew
|
||||
result = prepare_filter_prevew(watch_uuid=uuid, form_data=request.form, datastore=datastore)
|
||||
return jsonify(result)
|
||||
@@ -418,6 +386,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
s = re.sub(r'[0-9]+', r'\\d+', s)
|
||||
datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/')
|
||||
|
||||
# Save the updated ignore_text
|
||||
datastore.data["watching"][uuid].commit()
|
||||
|
||||
return f"<a href={url_for('ui.ui_preview.preview_page', uuid=uuid)}>Click to preview</a>"
|
||||
|
||||
return edit_blueprint
|
||||
@@ -26,10 +26,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/preview.py::render()
|
||||
If a processor doesn't have a preview module, falls back to default text preview.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -39,24 +38,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have preview module, use default text preview
|
||||
content = []
|
||||
@@ -150,10 +146,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
"""
|
||||
from flask import make_response
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -163,39 +157,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
return preview_blueprint
|
||||
|
||||
@@ -45,14 +45,19 @@
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="#general">{{ _('General') }}</a></li>
|
||||
{% if capabilities.supports_request_type %}
|
||||
<li class="tab"><a href="#request">{{ _('Request') }}</a></li>
|
||||
{% endif %}
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">{{ _('Browser Steps') }}</a></li>
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
{% endif %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
||||
{% endif %}
|
||||
@@ -110,12 +115,20 @@
|
||||
{{ _('Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.history_snapshot_max_length, class="history_snapshot_max_length") }}
|
||||
<span class="pure-form-message-inline">{{ _('Limit collection of history snapshots for each watch to this number of history items.') }}
|
||||
<br>
|
||||
{{ _('Set to empty to use system settings default') }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_ternary_field(form.use_page_title_in_list) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if capabilities.supports_request_type %}
|
||||
<div class="tab-pane-inner" id="request">
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
@@ -203,6 +216,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="tab-pane-inner" id="browser-steps">
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
@@ -283,8 +297,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
<div class="tab-pane-inner" id="conditions">
|
||||
<script>
|
||||
const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}";
|
||||
@@ -303,7 +316,9 @@ Math: {{ 1 + 1 }}") }}
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
||||
<div>
|
||||
<div id="edit-text-filter">
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers_elements %}
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
<strong>{{ _('Pro-tips:') }}</strong><br>
|
||||
<ul>
|
||||
<li>
|
||||
@@ -314,8 +329,8 @@ Math: {{ 1 + 1 }}") }}
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{% include "edit/include_subtract.html" %}
|
||||
{% endif %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
@@ -374,7 +389,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ extra_form_content|safe }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
|
||||
@@ -386,7 +401,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ _('The Visual Selector tool lets you select the') }} <i>{{ _('text') }}</i> {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} <a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a> {{ _('tab. Use') }} <strong>{{ _('Shift+Click') }}</strong> {{ _('to select multiple items.') }}
|
||||
</span>
|
||||
|
||||
{% if watch['processor'] == 'image_ssim_diff' %}
|
||||
{% if watch['processor'] == 'image_ssim_diff' %} {# @todo, integrate with image_ssim_diff selector better, use some extra form ? #}
|
||||
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
|
||||
<label style="font-weight: 600; margin-right: 15px;">{{ _('Selection Mode:') }}</label>
|
||||
<label style="margin-right: 15px;">
|
||||
|
||||
@@ -2,7 +2,7 @@ from flask import Blueprint, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
|
||||
@@ -24,8 +24,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
flash(gettext('Warning, URL {} already exists').format(url), "notice")
|
||||
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
processor = request.form.get('processor', 'text_json_diff')
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
from changedetectionio import processors
|
||||
processor = request.form.get('processor', processors.get_default_processor())
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags','').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
|
||||
if new_uuid:
|
||||
if add_paused:
|
||||
@@ -33,9 +34,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||
else:
|
||||
# Straight into the queue.
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
flash(gettext("Watch added."))
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
||||
|
||||
return views_blueprint
|
||||
return views_blueprint
|
||||
|
||||
@@ -39,7 +39,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
elif op == 'mute':
|
||||
datastore.data['watching'][uuid].toggle_mute()
|
||||
|
||||
datastore.needs_write = True
|
||||
datastore.data['watching'][uuid].commit()
|
||||
return redirect(url_for('watchlist.index', tag = active_tag_uuid))
|
||||
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
{%- extends 'base.html' -%}
|
||||
{%- block content -%}
|
||||
{%- set tips = [
|
||||
_("Changedetection.io can monitor more than just web-pages! See our plugins!") ~ ' <a href="https://changedetection.io/plugins">' ~ _('More info') ~ '</a>',
|
||||
_("You can also add 'shared' watches.") ~ ' <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">' ~ _('More info') ~ '</a>'
|
||||
] -%}
|
||||
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
@@ -10,6 +14,46 @@
|
||||
// Initialize Feather icons after the page loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
feather.replace();
|
||||
|
||||
// Intersection Observer for lazy loading favicons
|
||||
// Only load favicon images when they enter the viewport
|
||||
if ('IntersectionObserver' in window) {
|
||||
const faviconObserver = new IntersectionObserver((entries, observer) => {
|
||||
entries.forEach(entry => {
|
||||
if (entry.isIntersecting) {
|
||||
const img = entry.target;
|
||||
const src = img.getAttribute('data-src');
|
||||
|
||||
if (src) {
|
||||
// Load the actual favicon
|
||||
img.src = src;
|
||||
img.removeAttribute('data-src');
|
||||
}
|
||||
|
||||
// Stop observing this image
|
||||
observer.unobserve(img);
|
||||
}
|
||||
});
|
||||
}, {
|
||||
// Start loading slightly before the image enters viewport
|
||||
rootMargin: '50px',
|
||||
threshold: 0.01
|
||||
});
|
||||
|
||||
// Observe all lazy favicon images
|
||||
document.querySelectorAll('.lazy-favicon').forEach(img => {
|
||||
faviconObserver.observe(img);
|
||||
});
|
||||
} else {
|
||||
// Fallback for older browsers: load all favicons immediately
|
||||
document.querySelectorAll('.lazy-favicon').forEach(img => {
|
||||
const src = img.getAttribute('data-src');
|
||||
if (src) {
|
||||
img.src = src;
|
||||
img.removeAttribute('data-src');
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<style>
|
||||
@@ -69,7 +113,9 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
</div>
|
||||
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;"><img alt="{{ _('Create a shareable link') }}" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > {{ _("Tip: You can also add 'shared' watches.") }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">{{ _('More info') }}</a></span>
|
||||
<span style="color:#eee; font-size: 80%;">
|
||||
<strong>Tip: </strong> {{ tips | random | safe }}
|
||||
</span>
|
||||
</form>
|
||||
</div>
|
||||
<div class="box">
|
||||
@@ -200,30 +246,40 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
<td class="title-col inline">
|
||||
<div class="flex-wrapper">
|
||||
{% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
|
||||
<div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder' #}
|
||||
<img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {% endif %} >
|
||||
<div>
|
||||
{# Intersection Observer lazy loading: store real URL in data-src, load only when visible in viewport #}
|
||||
<img alt="Favicon thumbnail"
|
||||
class="favicon lazy-favicon"
|
||||
loading="lazy"
|
||||
decoding="async"
|
||||
fetchpriority="low"
|
||||
{% if favicon %}
|
||||
data-src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}"
|
||||
{% endif %}
|
||||
src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E'>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<span class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</span>
|
||||
<a href="{{url_for('watchlist.index', tag=watch_tag_uuid) }}" class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</a>
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
|
||||
@@ -71,10 +71,19 @@ class Fetcher():
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
# Screenshot element locking - prevents layout shifts during screenshot capture
|
||||
# Only needed for visual comparison (image_ssim_diff processor)
|
||||
# Locks element dimensions in the first viewport to prevent headers/ads from resizing
|
||||
lock_viewport_elements = False # Default: disabled for performance
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
# Allow lock_viewport_elements to be set via kwargs
|
||||
if kwargs and 'lock_viewport_elements' in kwargs:
|
||||
self.lock_viewport_elements = kwargs.get('lock_viewport_elements')
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
@@ -8,20 +9,24 @@ from loguru import logger
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable, \
|
||||
BrowserStepsStepException
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport_size
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
|
||||
logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
||||
@@ -29,25 +34,31 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport_size['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled (took {lock_time:.2f}s)")
|
||||
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport_size['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
@@ -69,7 +80,11 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
# Restore original viewport size
|
||||
@@ -81,40 +96,54 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
|
||||
# If we have multiple chunks, stitch them together
|
||||
if len(screenshot_chunks) > 1:
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
|
||||
# Only use separate process for many chunks (4+) to avoid blocking the event loop
|
||||
if len(screenshot_chunks) <= 3:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
|
||||
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
else:
|
||||
# Use separate process for many chunks to avoid blocking
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
# Explicit cleanup
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
# Explicit cleanup
|
||||
del screenshot_chunks
|
||||
screenshot_chunks = None
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
|
||||
return screenshot_chunks[0]
|
||||
|
||||
@@ -184,7 +213,8 @@ class fetcher(Fetcher):
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
watch_uuid = getattr(self, 'watch_uuid', None)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Request GC immediately after screenshot to free memory
|
||||
# Screenshots can be large and browser steps take many of them
|
||||
@@ -233,6 +263,7 @@ class fetcher(Fetcher):
|
||||
import playwright._impl._errors
|
||||
import time
|
||||
self.delete_browser_steps_screenshots()
|
||||
self.watch_uuid = watch_uuid # Store for use in screenshot_step
|
||||
response = None
|
||||
|
||||
async with async_playwright() as p:
|
||||
@@ -318,13 +349,8 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
|
||||
# Cleanup before raising to prevent memory leak
|
||||
await self.page.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
# Force garbage collection to release Playwright resources immediately
|
||||
gc.collect()
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
# Finally block will handle cleanup
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
@@ -337,7 +363,11 @@ class fetcher(Fetcher):
|
||||
try:
|
||||
# Run Browser Steps here
|
||||
if self.browser_steps_get_valid_steps():
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
try:
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
except BrowserStepsStepException:
|
||||
# Finally block will handle cleanup
|
||||
raise
|
||||
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
@@ -374,44 +404,51 @@ class fetcher(Fetcher):
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - screenshots are large and base64 decode creates temporary buffers
|
||||
await self.page.request_gc()
|
||||
gc.collect()
|
||||
|
||||
except ScreenshotUnavailable:
|
||||
# Re-raise screenshot unavailable exceptions
|
||||
raise
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
|
||||
finally:
|
||||
# Request garbage collection one more time before closing
|
||||
# Clean up resources properly with timeouts to prevent hanging
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Clean up resources properly
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
if hasattr(self, 'page') and self.page:
|
||||
await self.page.request_gc()
|
||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed page for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing page for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing page for {url}: {e}")
|
||||
finally:
|
||||
self.page = None
|
||||
|
||||
try:
|
||||
await self.page.close()
|
||||
except:
|
||||
pass
|
||||
self.page = None
|
||||
if context:
|
||||
await asyncio.wait_for(context.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed context for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing context for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing context for {url}: {e}")
|
||||
finally:
|
||||
context = None
|
||||
|
||||
try:
|
||||
await context.close()
|
||||
except:
|
||||
pass
|
||||
context = None
|
||||
|
||||
try:
|
||||
await browser.close()
|
||||
except:
|
||||
pass
|
||||
browser = None
|
||||
if browser:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed browser connection for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing browser connection for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing browser for {url}: {e}")
|
||||
finally:
|
||||
browser = None
|
||||
|
||||
# Force Python GC to release Playwright resources immediately
|
||||
# Playwright objects can have circular references that delay cleanup
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import websockets.exceptions
|
||||
@@ -20,18 +21,20 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
|
||||
logger.debug(f"{watch_info}Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
@@ -50,20 +53,35 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
if page_height > page.viewport['height']:
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page() changes viewport height which triggers @media (min-height) rules
|
||||
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
file_read_start = time.time()
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
file_read_time = time.time() - file_read_start
|
||||
|
||||
evaluate_start = time.time()
|
||||
await page.evaluate(lock_elements_js)
|
||||
evaluate_time = time.time() - evaluate_start
|
||||
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled - File read: {file_read_time:.3f}s, Browser evaluate: {evaluate_time:.2f}s, Total: {lock_time:.2f}s")
|
||||
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
@@ -82,7 +100,11 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
@@ -93,26 +115,53 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
screenshot_chunks = None
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot_chunks[0]
|
||||
|
||||
|
||||
@@ -173,19 +222,36 @@ class fetcher(Fetcher):
|
||||
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
|
||||
|
||||
async def quit(self, watch=None):
|
||||
try:
|
||||
await self.page.close()
|
||||
del self.page
|
||||
except Exception as e:
|
||||
pass
|
||||
watch_uuid = watch.get('uuid') if watch else 'unknown'
|
||||
|
||||
# Close page
|
||||
try:
|
||||
await self.browser.close()
|
||||
del self.browser
|
||||
if hasattr(self, 'page') and self.page:
|
||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Page closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing page (5s)")
|
||||
except Exception as e:
|
||||
pass
|
||||
logger.warning(f"[{watch_uuid}] Error closing page: {e}")
|
||||
finally:
|
||||
self.page = None
|
||||
|
||||
logger.info("Cleanup puppeteer complete.")
|
||||
# Close browser connection
|
||||
try:
|
||||
if hasattr(self, 'browser') and self.browser:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Browser closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing browser (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{watch_uuid}] Error closing browser: {e}")
|
||||
finally:
|
||||
self.browser = None
|
||||
|
||||
logger.info(f"[{watch_uuid}] Cleanup puppeteer complete")
|
||||
|
||||
# Force garbage collection to release resources
|
||||
gc.collect()
|
||||
|
||||
async def fetch_page(self,
|
||||
current_include_filters,
|
||||
@@ -215,9 +281,11 @@ class fetcher(Fetcher):
|
||||
# Connect directly using the specified browser_ws_endpoint
|
||||
# @todo timeout
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Connecting to browser at {self.browser_connection_url}")
|
||||
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||
ignoreHTTPSErrors=True
|
||||
)
|
||||
logger.debug(f"[{watch_uuid}] Browser connected successfully")
|
||||
except websockets.exceptions.InvalidStatusCode as e:
|
||||
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
||||
except websockets.exceptions.InvalidURI:
|
||||
@@ -226,7 +294,18 @@ class fetcher(Fetcher):
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||
|
||||
# more reliable is to just request a new page
|
||||
self.page = await self.browser.newPage()
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Creating new page")
|
||||
self.page = await self.browser.newPage()
|
||||
logger.debug(f"[{watch_uuid}] Page created successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"[{watch_uuid}] Failed to create new page: {e}")
|
||||
# Browser is connected but page creation failed - must cleanup browser
|
||||
try:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
||||
raise
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||
@@ -295,6 +374,12 @@ class fetcher(Fetcher):
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
await asyncio.sleep(w)
|
||||
|
||||
# Check if page still exists (might have been closed due to error during sleep)
|
||||
if not self.page or not hasattr(self.page, '_client'):
|
||||
logger.debug("Page already closed, skipping stopLoading")
|
||||
return
|
||||
|
||||
logger.debug("Issuing stopLoading command...")
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
@@ -320,7 +405,9 @@ class fetcher(Fetcher):
|
||||
asyncio.create_task(handle_frame_navigation())
|
||||
response = await self.page.goto(url, timeout=0)
|
||||
await asyncio.sleep(1 + extra_wait)
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
# Check if page still exists before sending command
|
||||
if self.page and hasattr(self.page, '_client'):
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
|
||||
if response:
|
||||
break
|
||||
@@ -357,7 +444,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
@@ -387,7 +474,11 @@ class fetcher(Fetcher):
|
||||
|
||||
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force garbage collection - pyppeteer base64 decode creates temporary buffers
|
||||
import gc
|
||||
gc.collect()
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
|
||||
@@ -55,6 +55,26 @@ class fetcher(Fetcher):
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
# Configure retry adapter for low-level network errors only
|
||||
# Retries connection timeouts, read timeouts, connection resets - not HTTP status codes
|
||||
# Especially helpful in parallel test execution when servers are slow/overloaded
|
||||
# Configurable via REQUESTS_RETRY_MAX_COUNT (default: 3 attempts)
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
max_retries = int(os.getenv("REQUESTS_RETRY_MAX_COUNT", "6"))
|
||||
retry_strategy = Retry(
|
||||
total=max_retries,
|
||||
connect=max_retries, # Retry connection timeouts
|
||||
read=max_retries, # Retry read timeouts
|
||||
status=0, # Don't retry on HTTP status codes
|
||||
backoff_factor=0.5, # Wait 0.3s, 0.6s, 1.2s between retries
|
||||
allowed_methods=["HEAD", "GET", "OPTIONS", "POST"],
|
||||
raise_on_status=False
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||
from requests_file import FileAdapter
|
||||
@@ -142,10 +162,11 @@ class fetcher(Fetcher):
|
||||
watch_uuid=None,
|
||||
):
|
||||
"""Async wrapper that runs the synchronous requests code in a thread pool"""
|
||||
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
|
||||
# Run the synchronous _run_sync in a thread pool to avoid blocking the event loop
|
||||
# Retry logic is handled by requests' HTTPAdapter (see _run_sync for configuration)
|
||||
await loop.run_in_executor(
|
||||
None, # Use default ThreadPoolExecutor
|
||||
lambda: self._run_sync(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Lock Element Dimensions for Screenshot Capture
|
||||
* Lock Element Dimensions for Screenshot Capture (First Viewport Only)
|
||||
*
|
||||
* THE PROBLEM:
|
||||
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
|
||||
@@ -10,40 +10,31 @@
|
||||
* However, changing the viewport height triggers CSS media queries like:
|
||||
* @media (min-height: 860px) { .ad { height: 250px; } }
|
||||
*
|
||||
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
|
||||
* - Screenshot shows element at NEW size (after media query triggered)
|
||||
* - xpath element coordinates measured at OLD size (before viewport change)
|
||||
* - Visual selector overlays don't align with screenshot
|
||||
*
|
||||
* EXAMPLE BUG:
|
||||
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
|
||||
* - Viewport changes to 1280x3809 for screenshot
|
||||
* - Media query triggers: ad expands to 250px
|
||||
* - All content below shifts down by 112px (250-138)
|
||||
* - Article now at position: 391px (279+112)
|
||||
* - But xpath data says 279px → 112px mismatch! ✗
|
||||
* This causes elements (especially ads/headers) to resize during screenshot capture.
|
||||
*
|
||||
* THE SOLUTION:
|
||||
* Before changing viewport, lock ALL element dimensions with !important inline styles.
|
||||
* Inline styles with !important override media query CSS, preventing layout changes.
|
||||
* Lock element dimensions in the FIRST VIEWPORT ONLY with !important inline styles.
|
||||
* This prevents headers, navigation, and top ads from resizing when viewport changes.
|
||||
* We only lock the visible portion because:
|
||||
* - Most layout shifts happen in headers/navbars/top ads
|
||||
* - Locking only visible elements is 100x+ faster (100-200 elements vs 10,000+)
|
||||
* - Below-fold content shifts don't affect visual comparison accuracy
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Iterates through every element on the page
|
||||
* 2. Captures current computed dimensions (width, height)
|
||||
* 3. Sets inline styles with !important to freeze those dimensions
|
||||
* 1. Gets current viewport height
|
||||
* 2. Finds elements within first viewport (top of page to bottom of screen)
|
||||
* 3. Locks their dimensions with !important inline styles
|
||||
* 4. Disables ResizeObserver API (for JS-based resizing)
|
||||
* 5. When viewport changes for screenshot, media queries can't resize anything
|
||||
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
|
||||
* The page must be fully loaded and settled at its initial viewport size.
|
||||
* No need to restore state afterward - page is closed after screenshot.
|
||||
* Only enabled for image_ssim_diff processor (visual comparison).
|
||||
* Default: OFF for performance.
|
||||
*
|
||||
* PERFORMANCE:
|
||||
* - Iterates all DOM elements (can be 1000s on complex pages)
|
||||
* - Typically completes in 50-200ms
|
||||
* - One-time cost before screenshot, well worth it for coordinate accuracy
|
||||
* - Only processes 100-300 elements (first viewport) vs 10,000+ (entire page)
|
||||
* - Typically completes in 10-50ms
|
||||
* - 100x+ faster than locking entire page
|
||||
*
|
||||
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
|
||||
*/
|
||||
@@ -52,11 +43,34 @@
|
||||
// Store original styles in a global WeakMap for later restoration
|
||||
window.__elementSizingRestore = new WeakMap();
|
||||
|
||||
// Lock ALL element dimensions to prevent media query layout changes
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const rect = el.getBoundingClientRect();
|
||||
const start = performance.now();
|
||||
|
||||
// Get current viewport height (visible portion of page)
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
// Get all elements and filter to FIRST VIEWPORT ONLY
|
||||
// This dramatically reduces elements to process (100-300 vs 10,000+)
|
||||
const allElements = Array.from(document.querySelectorAll('*'));
|
||||
|
||||
// BATCH READ PHASE: Get bounding rects and filter to viewport
|
||||
const measurements = allElements.map(el => {
|
||||
const rect = el.getBoundingClientRect();
|
||||
const computed = window.getComputedStyle(el);
|
||||
|
||||
// Only lock elements in the first viewport (visible on initial page load)
|
||||
// rect.top < viewportHeight means element starts within visible area
|
||||
const inViewport = rect.top < viewportHeight && rect.top >= 0;
|
||||
const hasSize = rect.height > 0 && rect.width > 0;
|
||||
|
||||
return inViewport && hasSize ? { el, computed, rect } : null;
|
||||
}).filter(Boolean); // Remove null entries
|
||||
|
||||
const elapsed = performance.now() - start;
|
||||
console.log(`Locked first viewport elements: ${measurements.length} of ${allElements.length} total elements (viewport height: ${viewportHeight}px, took ${elapsed.toFixed(0)}ms)`);
|
||||
|
||||
// BATCH WRITE PHASE: Apply all inline styles without triggering layout
|
||||
// No interleaved reads means browser can optimize style application
|
||||
measurements.forEach(({el, computed, rect}) => {
|
||||
// Save original inline style values BEFORE locking
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
const originalStyles = {};
|
||||
@@ -89,5 +103,5 @@
|
||||
disconnect() {}
|
||||
};
|
||||
|
||||
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
|
||||
console.log(`✓ Element dimensions locked (${measurements.length} elements) to prevent media query changes during screenshot`);
|
||||
})();
|
||||
|
||||
@@ -8,92 +8,42 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
||||
|
||||
# Cache font to avoid loading on every stitch
|
||||
_cached_font = None
|
||||
|
||||
def _get_caption_font():
|
||||
"""Get or create cached font for caption text."""
|
||||
global _cached_font
|
||||
if _cached_font is None:
|
||||
from PIL import ImageFont
|
||||
try:
|
||||
_cached_font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
_cached_font = ImageFont.load_default()
|
||||
return _cached_font
|
||||
|
||||
|
||||
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together inline (no multiprocessing).
|
||||
Optimized for small number of chunks (2-3) to avoid process creation overhead.
|
||||
|
||||
Args:
|
||||
chunks_bytes: List of JPEG image bytes
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
|
||||
Returns:
|
||||
bytes: Stitched JPEG image
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
# Create stitched image
|
||||
stitched = Image.new('RGB', (max_width, total_height))
|
||||
y_offset = 0
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font = _get_caption_font()
|
||||
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode to JPEG
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result = output.getvalue()
|
||||
|
||||
# Cleanup
|
||||
stitched.close()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
||||
def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
Used for large number of chunks (4+) to avoid blocking the main event loop.
|
||||
|
||||
Uses spawn multiprocessing to isolate PIL's C-level memory allocation.
|
||||
When the subprocess exits, the OS reclaims ALL memory including C-level allocations
|
||||
that Python's GC cannot release. This prevents the ~50MB per stitch from accumulating
|
||||
in the main process.
|
||||
|
||||
Trade-off: Adds 35MB resource_tracker subprocess, but prevents 500MB+ memory leak
|
||||
in main process (much better at scale: 35GB vs 500GB for 1000 instances).
|
||||
|
||||
Args:
|
||||
pipe_conn: Pipe connection to receive data and send result
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
import struct
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
try:
|
||||
# Receive chunk count as 4-byte integer (no pickle!)
|
||||
count_bytes = pipe_conn.recv_bytes()
|
||||
chunk_count = struct.unpack('I', count_bytes)[0]
|
||||
|
||||
# Receive each chunk as raw bytes (no pickle!)
|
||||
chunks_bytes = []
|
||||
for _ in range(chunk_count):
|
||||
chunks_bytes.append(pipe_conn.recv_bytes())
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
del chunks_bytes
|
||||
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
@@ -103,15 +53,14 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
im.close()
|
||||
del images
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
|
||||
# Try to load font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
@@ -120,23 +69,26 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode and send image with optimization
|
||||
# Encode and send
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
pipe_conn.send_bytes(output.getvalue())
|
||||
result_bytes = output.getvalue()
|
||||
|
||||
stitched.close()
|
||||
del stitched
|
||||
output.close()
|
||||
del output
|
||||
|
||||
pipe_conn.send_bytes(result_bytes)
|
||||
del result_bytes
|
||||
|
||||
except Exception as e:
|
||||
pipe_conn.send(f"error:{e}")
|
||||
logger.error(f"Error in stitch_images_worker_raw_bytes: {e}")
|
||||
error_msg = f"error:{e}".encode('utf-8')
|
||||
pipe_conn.send_bytes(error_msg)
|
||||
finally:
|
||||
pipe_conn.close()
|
||||
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from pathlib import Path
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
from flask import (
|
||||
Flask,
|
||||
@@ -94,6 +94,14 @@ if os.getenv('FLASK_SERVER_NAME'):
|
||||
app.config['BABEL_TRANSLATION_DIRECTORIES'] = str(Path(__file__).parent / 'translations')
|
||||
app.config['BABEL_DEFAULT_LOCALE'] = 'en_GB'
|
||||
|
||||
# Session configuration
|
||||
# NOTE: Flask session (for locale, etc.) is separate from Flask-Login's remember-me cookie
|
||||
# - Flask session stores data like session['locale'] in a signed cookie
|
||||
# - Flask-Login's remember=True creates a separate authentication cookie
|
||||
# - Setting PERMANENT_SESSION_LIFETIME controls how long the Flask session cookie lasts
|
||||
from datetime import timedelta
|
||||
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=3650) # ~10 years (effectively unlimited)
|
||||
|
||||
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
|
||||
|
||||
|
||||
@@ -187,7 +195,7 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
return worker_handler.is_watch_running(watch_obj['uuid'])
|
||||
return worker_pool.is_watch_running(watch_obj['uuid'])
|
||||
|
||||
@app.template_global('get_watch_queue_position')
|
||||
def _get_watch_queue_position(watch_obj):
|
||||
@@ -198,13 +206,13 @@ def _get_watch_queue_position(watch_obj):
|
||||
@app.template_global('get_current_worker_count')
|
||||
def _get_current_worker_count():
|
||||
"""Get the current number of operational workers"""
|
||||
return worker_handler.get_worker_count()
|
||||
return worker_pool.get_worker_count()
|
||||
|
||||
@app.template_global('get_worker_status_info')
|
||||
def _get_worker_status_info():
|
||||
"""Get detailed worker status information for display"""
|
||||
status = worker_handler.get_worker_status()
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
status = worker_pool.get_worker_status()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
return {
|
||||
'count': status['worker_count'],
|
||||
@@ -258,6 +266,47 @@ def _jinja2_filter_seconds_precise(timestamp):
|
||||
|
||||
return format(int(time.time()-timestamp), ',d')
|
||||
|
||||
@app.template_filter('format_duration')
|
||||
def _jinja2_filter_format_duration(seconds):
|
||||
"""Format a duration in seconds into human readable string like '5 days, 3 hours, 30 minutes'"""
|
||||
from datetime import timedelta
|
||||
|
||||
if not seconds or seconds < 0:
|
||||
return gettext('0 seconds')
|
||||
|
||||
td = timedelta(seconds=int(seconds))
|
||||
|
||||
# Calculate components
|
||||
years = td.days // 365
|
||||
remaining_days = td.days % 365
|
||||
months = remaining_days // 30
|
||||
remaining_days = remaining_days % 30
|
||||
weeks = remaining_days // 7
|
||||
days = remaining_days % 7
|
||||
|
||||
hours = td.seconds // 3600
|
||||
minutes = (td.seconds % 3600) // 60
|
||||
secs = td.seconds % 60
|
||||
|
||||
# Build parts list
|
||||
parts = []
|
||||
if years > 0:
|
||||
parts.append(f"{years} {gettext('year') if years == 1 else gettext('years')}")
|
||||
if months > 0:
|
||||
parts.append(f"{months} {gettext('month') if months == 1 else gettext('months')}")
|
||||
if weeks > 0:
|
||||
parts.append(f"{weeks} {gettext('week') if weeks == 1 else gettext('weeks')}")
|
||||
if days > 0:
|
||||
parts.append(f"{days} {gettext('day') if days == 1 else gettext('days')}")
|
||||
if hours > 0:
|
||||
parts.append(f"{hours} {gettext('hour') if hours == 1 else gettext('hours')}")
|
||||
if minutes > 0:
|
||||
parts.append(f"{minutes} {gettext('minute') if minutes == 1 else gettext('minutes')}")
|
||||
if secs > 0 or not parts:
|
||||
parts.append(f"{secs} {gettext('second') if secs == 1 else gettext('seconds')}")
|
||||
|
||||
return ", ".join(parts)
|
||||
|
||||
@app.template_filter('fetcher_status_icons')
|
||||
def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
"""Get status icon HTML for a given fetcher.
|
||||
@@ -393,7 +442,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE'] = datastore_o
|
||||
|
||||
|
||||
# Store batch mode flag to skip background threads when running in batch mode
|
||||
app.config['batch_mode'] = config.get('batch_mode', False) if config else False
|
||||
|
||||
# Store the signal in the app config to ensure it's accessible everywhere
|
||||
app.config['watch_check_update_SIGNAL'] = watch_check_update
|
||||
|
||||
@@ -550,6 +602,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# Validate the locale against available languages
|
||||
if locale in language_codes:
|
||||
# Make session permanent so language preference persists across browser sessions
|
||||
# NOTE: This is the Flask session cookie (separate from Flask-Login's remember-me auth cookie)
|
||||
session.permanent = True
|
||||
session['locale'] = locale
|
||||
|
||||
# CRITICAL: Flask-Babel caches the locale in the request context (ctx.babel_locale)
|
||||
@@ -689,10 +744,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
filepath = os.path.join(watch.data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
@@ -787,13 +842,15 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# watchlist UI buttons etc
|
||||
import changedetectionio.blueprint.ui as ui
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update))
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_pool, queuedWatchMetaData, watch_check_update))
|
||||
|
||||
import changedetectionio.blueprint.watchlist as watchlist
|
||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||
|
||||
# Initialize Socket.IO server conditionally based on settings
|
||||
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
socket_io_enabled = datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True)
|
||||
if socket_io_enabled and app.config.get('batch_mode'):
|
||||
socket_io_enabled = False
|
||||
if socket_io_enabled:
|
||||
from changedetectionio.realtime.socket_server import init_socketio
|
||||
global socketio_server
|
||||
@@ -822,10 +879,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
|
||||
# Get basic status
|
||||
status = worker_handler.get_worker_status()
|
||||
status = worker_pool.get_worker_status()
|
||||
|
||||
# Perform health check
|
||||
health_result = worker_handler.check_worker_health(
|
||||
health_result = worker_pool.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -889,16 +946,31 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Can be overridden by ENV or use the default settings
|
||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
logger.info(f"Starting {n_workers} workers during app initialization")
|
||||
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
worker_pool.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
threading.Thread(target=notification_runner, daemon=True, name="NotificationRunner").start()
|
||||
# Skip background threads in batch mode (just process queue and exit)
|
||||
batch_mode = app.config.get('batch_mode', False)
|
||||
if not batch_mode:
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
# Start configurable number of notification workers (default 1)
|
||||
notification_workers = int(os.getenv("NOTIFICATION_WORKERS", "1"))
|
||||
for i in range(notification_workers):
|
||||
threading.Thread(
|
||||
target=notification_runner,
|
||||
args=(i,),
|
||||
daemon=True,
|
||||
name=f"NotificationRunner-{i}"
|
||||
).start()
|
||||
logger.info(f"Started {notification_workers} notification worker(s)")
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
else:
|
||||
logger.info("Batch mode: Skipping ticker thread, notification runner, and version checker")
|
||||
|
||||
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
|
||||
# This avoids circular dependencies
|
||||
@@ -933,14 +1005,14 @@ def check_for_new_version():
|
||||
app.config.exit.wait(86400)
|
||||
|
||||
|
||||
def notification_runner():
|
||||
def notification_runner(worker_id=0):
|
||||
global notification_debug_log
|
||||
from datetime import datetime
|
||||
import json
|
||||
with app.app_context():
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
# At the moment only one thread runs (single runner)
|
||||
# Multiple workers can run concurrently (configurable via NOTIFICATION_WORKERS)
|
||||
n_object = notification_q.get(block=False)
|
||||
except queue.Empty:
|
||||
app.config.exit.wait(1)
|
||||
@@ -966,7 +1038,7 @@ def notification_runner():
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
logger.error(f"Notification worker {worker_id} - Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
|
||||
# UUID wont be present when we submit a 'test' from the global settings
|
||||
if 'uuid' in n_object:
|
||||
@@ -1007,7 +1079,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
now = time.time()
|
||||
if now - last_health_check > 60:
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
health_result = worker_handler.check_worker_health(
|
||||
health_result = worker_pool.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -1026,7 +1098,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
continue
|
||||
|
||||
# Get a list of watches by UUID that are currently fetching data
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
# Build set of queued UUIDs once for O(1) lookup instead of O(n) per watch
|
||||
queued_uuids = {q_item.item['uuid'] for q_item in update_q.queue}
|
||||
@@ -1132,7 +1204,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
priority = int(time.time())
|
||||
|
||||
# Into the queue with you
|
||||
queued_successfully = worker_handler.queue_item_async_safe(update_q,
|
||||
queued_successfully = worker_pool.queue_item_async_safe(update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=priority,
|
||||
item={'uuid': uuid})
|
||||
)
|
||||
|
||||
@@ -730,7 +730,7 @@ class quickWatchForm(Form):
|
||||
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
||||
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
@@ -749,7 +749,7 @@ class commonSettingsForm(Form):
|
||||
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))])
|
||||
|
||||
@@ -763,7 +763,7 @@ class commonSettingsForm(Form):
|
||||
|
||||
|
||||
class importForm(Form):
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
urls = TextAreaField(_l('URLs'))
|
||||
xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))])
|
||||
file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||
@@ -837,6 +837,8 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
||||
use_page_title_in_list = TernaryNoneBooleanField(_l('Use page <title> in list'), default=None)
|
||||
|
||||
history_snapshot_max_length = IntegerField(_l('Number of history items per watch to keep'), render_kw={"style": "width: 5em;"}, validators=[validators.Optional(), validators.NumberRange(min=2)])
|
||||
|
||||
def extra_tab_content(self):
|
||||
return None
|
||||
|
||||
@@ -1034,6 +1036,8 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message=_l("Should contain zero or more attempts"))])
|
||||
|
||||
history_snapshot_max_length = IntegerField(_l('Number of history items per watch to keep'), render_kw={"style": "width: 5em;"}, validators=[validators.Optional(), validators.NumberRange(min=2)])
|
||||
ui = FormField(globalSettingsApplicationUIForm)
|
||||
|
||||
|
||||
|
||||
@@ -52,7 +52,13 @@ def render(template_str, **args: t.Any) -> str:
|
||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||
|
||||
def render_fully_escaped(content):
|
||||
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
|
||||
template = env.from_string("{{ some_html|e }}")
|
||||
return template.render(some_html=content)
|
||||
"""
|
||||
Escape HTML content safely.
|
||||
|
||||
MEMORY LEAK FIX: Use markupsafe.escape() directly instead of creating
|
||||
Jinja2 environments (was causing 1M+ compilations per page load).
|
||||
Simpler, faster, and no concerns about environment state.
|
||||
"""
|
||||
from markupsafe import escape
|
||||
return str(escape(content))
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ class model(dict):
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "5")), # Number of threads, lower is better for slow connections
|
||||
'default_ua': {
|
||||
'html_requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT),
|
||||
'html_webdriver': None,
|
||||
@@ -46,6 +46,7 @@ class model(dict):
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
'history_snapshot_max_length': None,
|
||||
'ignore_whitespace': True,
|
||||
'ignore_status_codes': False, #@todo implement, as ternary.
|
||||
'ssim_threshold': '0.96', # Default SSIM threshold for screenshot comparison
|
||||
|
||||
@@ -1,10 +1,48 @@
|
||||
"""
|
||||
Tag/Group domain model for organizing and overriding watch settings.
|
||||
|
||||
ARCHITECTURE NOTE: Configuration Override Hierarchy
|
||||
===================================================
|
||||
|
||||
Tags can override Watch settings when overrides_watch=True.
|
||||
Current implementation requires manual checking in processors:
|
||||
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = datastore['settings']['application']['tags'][tag_uuid]
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
break
|
||||
|
||||
With Pydantic, this would be automatic via chain resolution:
|
||||
Watch → Tag (first with overrides_watch) → Global
|
||||
|
||||
See: Watch.py model docstring for full Pydantic architecture explanation
|
||||
See: processors/restock_diff/processor.py:184-192 for current manual implementation
|
||||
"""
|
||||
|
||||
from changedetectionio.model import watch_base
|
||||
from changedetectionio.model.persistence import EntityPersistenceMixin
|
||||
|
||||
class model(EntityPersistenceMixin, watch_base):
|
||||
"""
|
||||
Tag domain model - groups watches and can override their settings.
|
||||
|
||||
class model(watch_base):
|
||||
Tags inherit from watch_base to reuse all the same fields as Watch.
|
||||
When overrides_watch=True, tag settings take precedence over watch settings
|
||||
for all watches in this tag/group.
|
||||
|
||||
Fields:
|
||||
overrides_watch (bool): If True, this tag's settings override watch settings
|
||||
title (str): Display name for this tag/group
|
||||
uuid (str): Unique identifier
|
||||
... (all fields from watch_base can be set as tag-level overrides)
|
||||
|
||||
Resolution order when overrides_watch=True:
|
||||
Watch.field → Tag.field (if overrides_watch) → Global.field
|
||||
"""
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Parent class (watch_base) handles __datastore and __datastore_path
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
@@ -12,3 +50,7 @@ class model(watch_base):
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() and _get_commit_data() methods inherited from watch_base
|
||||
# Tag uses default _get_commit_data() (includes all keys)
|
||||
|
||||
@@ -1,9 +1,37 @@
|
||||
"""
|
||||
Watch domain model for change detection monitoring.
|
||||
|
||||
ARCHITECTURE NOTE: Configuration Override Hierarchy
|
||||
===================================================
|
||||
|
||||
This module implements Watch objects that inherit from dict (technical debt).
|
||||
The dream architecture would use Pydantic for:
|
||||
|
||||
1. CHAIN RESOLUTION (Watch → Tag → Global Settings)
|
||||
- Current: Manual resolution scattered across codebase
|
||||
- Future: @computed_field properties with automatic resolution
|
||||
- Examples: resolved_fetch_backend, resolved_restock_settings, etc.
|
||||
|
||||
2. DATABASE BACKEND ABSTRACTION
|
||||
- Current: Domain model tightly coupled to file-based JSON storage
|
||||
- Future: Domain model (Pydantic) separate from persistence layer
|
||||
- Enables: Easy migration to PostgreSQL, MongoDB, etc.
|
||||
|
||||
3. TYPE SAFETY & VALIDATION
|
||||
- Current: Dict access with no compile-time checks
|
||||
- Future: Type hints, IDE autocomplete, validation at boundaries
|
||||
|
||||
See class model docstring for detailed explanation and examples.
|
||||
See: processors/restock_diff/processor.py:184-192 for manual resolution example
|
||||
"""
|
||||
|
||||
from blinker import signal
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from . import watch_base
|
||||
from .persistence import EntityPersistenceMixin
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
@@ -13,15 +41,16 @@ from .. import jinja2_custom as safe_jinja
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||
BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20))
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
Save compressed data using native brotli.
|
||||
Testing shows no memory leak when using gc.collect() after compression.
|
||||
Save compressed data using native brotli with streaming compression.
|
||||
Uses chunked compression to minimize peak memory usage and malloc_trim()
|
||||
to force release of C-level memory back to the OS.
|
||||
|
||||
Args:
|
||||
contents: data to compress (str or bytes)
|
||||
@@ -37,27 +66,52 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
import brotli
|
||||
import gc
|
||||
import ctypes
|
||||
|
||||
# Ensure contents are bytes
|
||||
if isinstance(contents, str):
|
||||
contents = contents.encode('utf-8')
|
||||
|
||||
try:
|
||||
logger.debug(f"Starting brotli compression of {len(contents)} bytes.")
|
||||
original_size = len(contents)
|
||||
logger.debug(f"Starting brotli streaming compression of {original_size} bytes.")
|
||||
|
||||
if mode is not None:
|
||||
compressed_data = brotli.compress(contents, mode=mode)
|
||||
else:
|
||||
compressed_data = brotli.compress(contents)
|
||||
# Create streaming compressor
|
||||
compressor = brotli.Compressor(quality=6, mode=mode if mode is not None else brotli.MODE_GENERIC)
|
||||
|
||||
# Stream compress in chunks to minimize memory usage
|
||||
chunk_size = 65536 # 64KB chunks
|
||||
total_compressed_size = 0
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(compressed_data)
|
||||
# Process data in chunks
|
||||
offset = 0
|
||||
while offset < len(contents):
|
||||
chunk = contents[offset:offset + chunk_size]
|
||||
compressed_chunk = compressor.process(chunk)
|
||||
if compressed_chunk:
|
||||
f.write(compressed_chunk)
|
||||
total_compressed_size += len(compressed_chunk)
|
||||
offset += chunk_size
|
||||
|
||||
logger.debug(f"Finished brotli compression - From {len(contents)} to {len(compressed_data)} bytes.")
|
||||
# Finalize compression - critical for proper cleanup
|
||||
final_chunk = compressor.finish()
|
||||
if final_chunk:
|
||||
f.write(final_chunk)
|
||||
total_compressed_size += len(final_chunk)
|
||||
|
||||
# Force garbage collection to prevent memory buildup
|
||||
logger.debug(f"Finished brotli compression - From {original_size} to {total_compressed_size} bytes.")
|
||||
|
||||
# Cleanup: Delete compressor, force Python GC, then force C-level memory release
|
||||
del compressor
|
||||
gc.collect()
|
||||
|
||||
# Force release of C-level memory back to OS (since brotli is a C library)
|
||||
try:
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass # malloc_trim not available on all systems (e.g., macOS)
|
||||
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
@@ -74,17 +128,112 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
raise Exception(f"Brotli compression failed for {filepath}: {e}")
|
||||
|
||||
|
||||
class model(watch_base):
|
||||
class model(EntityPersistenceMixin, watch_base):
|
||||
"""
|
||||
Watch domain model for monitoring URL changes.
|
||||
|
||||
Inherits from watch_base (which inherits dict) - see watch_base docstring for field documentation.
|
||||
|
||||
## Configuration Override Hierarchy (Chain Resolution)
|
||||
|
||||
The dream architecture uses a 3-level resolution chain:
|
||||
Watch settings → Tag/Group settings → Global settings
|
||||
|
||||
Current implementation is MANUAL (see processor.py:184-192 for example):
|
||||
- Processors manually check watch.get('field')
|
||||
- Then loop through watch.tags to find first tag with overrides_watch=True
|
||||
- Finally fall back to datastore['settings']['application']['field']
|
||||
|
||||
FUTURE: Pydantic-based chain resolution would enable:
|
||||
|
||||
```python
|
||||
# Instead of manual resolution in every processor:
|
||||
restock_settings = watch.get('restock_settings', {})
|
||||
for tag_uuid in watch.get('tags'):
|
||||
tag = datastore['settings']['application']['tags'][tag_uuid]
|
||||
if tag.get('overrides_watch'):
|
||||
restock_settings = tag.get('restock_settings', {})
|
||||
break
|
||||
|
||||
# Clean computed properties with automatic resolution:
|
||||
@computed_field
|
||||
def resolved_restock_settings(self) -> dict:
|
||||
if self.restock_settings:
|
||||
return self.restock_settings
|
||||
for tag_uuid in self.tags:
|
||||
tag = self._datastore.get_tag(tag_uuid)
|
||||
if tag.overrides_watch and tag.restock_settings:
|
||||
return tag.restock_settings
|
||||
return self._datastore.settings.restock_settings or {}
|
||||
|
||||
# Usage: watch.resolved_restock_settings (automatic, type-safe, tested once)
|
||||
```
|
||||
|
||||
Benefits of Pydantic migration:
|
||||
1. Single source of truth for resolution logic (not scattered across processors)
|
||||
2. Type safety + IDE autocomplete (watch.resolved_fetch_backend vs dict navigation)
|
||||
3. Database backend abstraction (domain model separate from persistence)
|
||||
4. Automatic validation at boundaries
|
||||
5. Self-documenting via type hints
|
||||
6. Easy to test resolution independently
|
||||
|
||||
Resolution chain examples that would benefit:
|
||||
- fetch_backend: watch → tag → global (see get_fetch_backend property)
|
||||
- notification_urls: watch → tag → global
|
||||
- time_between_check: watch → global (see threshold_seconds)
|
||||
- restock_settings: watch → tag (see processors/restock_diff/processor.py:184-192)
|
||||
- history_snapshot_max_length: watch → global (see save_history_blob:550-556)
|
||||
- All processor_config_* settings could use tag overrides
|
||||
|
||||
## Database Backend Abstraction with Pydantic
|
||||
|
||||
Current: Watch inherits dict, tightly coupled to file-based JSON storage
|
||||
Future: Domain model (Watch) separate from persistence layer
|
||||
|
||||
```python
|
||||
# Domain model (database-agnostic)
|
||||
class Watch(BaseModel):
|
||||
uuid: str
|
||||
url: str
|
||||
# ... validation, business logic
|
||||
|
||||
# Pluggable backends
|
||||
class DataStoreBackend(ABC):
|
||||
def save_watch(self, watch: Watch): ...
|
||||
def load_watch(self, uuid: str) -> Watch: ...
|
||||
|
||||
# Implementations: FileBackend, MongoBackend, PostgresBackend, etc.
|
||||
```
|
||||
|
||||
This would enable:
|
||||
- Easy migration between storage backends (file → postgres → mongodb)
|
||||
- Pydantic handles serialization/deserialization automatically
|
||||
- Domain logic stays clean (no storage concerns in Watch methods)
|
||||
|
||||
## Migration Path
|
||||
|
||||
Given existing codebase, incremental migration recommended:
|
||||
1. Create Pydantic models alongside existing dict-based models
|
||||
2. Add .to_pydantic() / .from_pydantic() bridge methods
|
||||
3. Gradually migrate code to use Pydantic models
|
||||
4. Remove dict inheritance once migration complete
|
||||
|
||||
See: watch_base docstring for technical debt discussion
|
||||
See: processors/restock_diff/processor.py:184-192 for manual resolution example
|
||||
See: Watch.py:550-556 for nested dict navigation that would become watch.resolved_*
|
||||
"""
|
||||
__newest_history_key = None
|
||||
__history_n = 0
|
||||
jitter_seconds = 0
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
self.__datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
# Validate __datastore before calling parent (Watch requires it)
|
||||
if not kw.get('__datastore'):
|
||||
raise ValueError("Watch object requires '__datastore' reference - cannot access global settings without it")
|
||||
|
||||
# Parent class (watch_base) handles __datastore and __datastore_path
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
@@ -95,6 +244,9 @@ class model(watch_base):
|
||||
# Be sure the cached timestamp is ready
|
||||
bump = self.history
|
||||
|
||||
# Note: __deepcopy__, __getstate__, and __setstate__ are inherited from watch_base
|
||||
# This prevents memory leaks by sharing __datastore reference instead of copying it
|
||||
|
||||
@property
|
||||
def viewed(self):
|
||||
# Don't return viewed when last_viewed is 0 and newest_key is 0
|
||||
@@ -107,11 +259,6 @@ class model(watch_base):
|
||||
def has_unviewed(self):
|
||||
return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
if not os.path.isdir(self.watch_data_dir):
|
||||
logger.debug(f"> Creating data dir {self.watch_data_dir}")
|
||||
os.mkdir(self.watch_data_dir)
|
||||
|
||||
@property
|
||||
def link(self):
|
||||
|
||||
@@ -167,7 +314,8 @@ class model(watch_base):
|
||||
|
||||
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
|
||||
# But preserve processor config files (they're configuration, not history data)
|
||||
for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
|
||||
# Use glob not rglob here for safety.
|
||||
for item in pathlib.Path(str(self.data_dir)).glob("*.*"):
|
||||
# Skip processor config files
|
||||
if item.name in processor_config_files:
|
||||
continue
|
||||
@@ -204,8 +352,30 @@ class model(watch_base):
|
||||
@property
|
||||
def get_fetch_backend(self):
|
||||
"""
|
||||
Like just using the `fetch_backend` key but there could be some logic
|
||||
:return:
|
||||
Get the fetch backend for this watch with special case handling.
|
||||
|
||||
CHAIN RESOLUTION OPPORTUNITY:
|
||||
Currently returns watch.fetch_backend directly, but doesn't implement
|
||||
Watch → Tag → Global resolution chain. With Pydantic:
|
||||
|
||||
@computed_field
|
||||
def resolved_fetch_backend(self) -> str:
|
||||
# Special case: PDFs always use html_requests
|
||||
if self.is_pdf:
|
||||
return 'html_requests'
|
||||
|
||||
# Watch override
|
||||
if self.fetch_backend and self.fetch_backend != 'system':
|
||||
return self.fetch_backend
|
||||
|
||||
# Tag override (first tag with overrides_watch=True wins)
|
||||
for tag_uuid in self.tags:
|
||||
tag = self._datastore.get_tag(tag_uuid)
|
||||
if tag.overrides_watch and tag.fetch_backend:
|
||||
return tag.fetch_backend
|
||||
|
||||
# Global default
|
||||
return self._datastore.settings.fetch_backend
|
||||
"""
|
||||
# Maybe also if is_image etc?
|
||||
# This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text.
|
||||
@@ -254,11 +424,11 @@ class model(watch_base):
|
||||
tmp_history = {}
|
||||
|
||||
# In the case we are only using the watch for processing without history
|
||||
if not self.watch_data_dir:
|
||||
if not self.data_dir:
|
||||
return []
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
if os.path.isfile(fname):
|
||||
logger.debug(f"Reading watch history index for {self.get('uuid')}")
|
||||
with open(fname, "r", encoding='utf-8') as f:
|
||||
@@ -271,13 +441,13 @@ class model(watch_base):
|
||||
# Cross-platform: check for any path separator (works on Windows and Unix)
|
||||
if os.sep not in v and '/' not in v and '\\' not in v:
|
||||
# Relative filename only, no path separators
|
||||
v = os.path.join(self.watch_data_dir, v)
|
||||
v = os.path.join(self.data_dir, v)
|
||||
else:
|
||||
# It's possible that they moved the datadir on older versions
|
||||
# So the snapshot exists but is in a different path
|
||||
# Cross-platform: use os.path.basename instead of split('/')
|
||||
snapshot_fname = os.path.basename(v)
|
||||
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
proposed_new_path = os.path.join(self.data_dir, snapshot_fname)
|
||||
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||
v = proposed_new_path
|
||||
|
||||
@@ -294,7 +464,7 @@ class model(watch_base):
|
||||
|
||||
@property
|
||||
def has_history(self):
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
return os.path.isfile(fname)
|
||||
|
||||
@property
|
||||
@@ -397,16 +567,49 @@ class model(watch_base):
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
return f.read()
|
||||
|
||||
def _write_atomic(self, dest, data):
|
||||
def _write_atomic(self, dest, data, mode='wb'):
|
||||
"""Write data atomically to dest using a temp file"""
|
||||
if not os.path.exists(dest):
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.replace(tmp_path, dest)
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.data_dir) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.replace(tmp_path, dest)
|
||||
|
||||
def history_trim(self, newest_n_items):
|
||||
from pathlib import Path
|
||||
import gc
|
||||
# Sort by timestamp (key)
|
||||
sorted_items = sorted(self.history.items(), key=lambda x: int(x[0]))
|
||||
|
||||
keep_part = dict(sorted_items[-newest_n_items:])
|
||||
delete_part = dict(sorted_items[:-newest_n_items])
|
||||
logger.info( f"[{self.get('uuid')}] Trimming history to most recent {newest_n_items} items, keeping {len(keep_part)} items deleting {len(delete_part)} items.")
|
||||
|
||||
if delete_part:
|
||||
for item in delete_part.items():
|
||||
try:
|
||||
Path(item[1]).unlink(missing_ok=True)
|
||||
except Exception as e:
|
||||
logger.critical(f"{str(e)}")
|
||||
finally:
|
||||
logger.debug(f"[{self.get('uuid')}] Deleted {item[1]} history snapshot")
|
||||
try:
|
||||
dest = os.path.join(self.data_dir, self.history_index_filename)
|
||||
output = "\r\n".join(
|
||||
f"{k},{Path(v).name}"
|
||||
for k, v in keep_part.items()
|
||||
)+"\r\n"
|
||||
self._write_atomic(dest=dest, data=output, mode='w')
|
||||
except Exception as e:
|
||||
logger.critical(f"{str(e)}")
|
||||
finally:
|
||||
logger.debug(f"[{self.get('uuid')}] Updated history index {dest}")
|
||||
|
||||
# reimport
|
||||
bump = self.history
|
||||
gc.collect()
|
||||
|
||||
# Save some text file to the appropriate path and bump the history
|
||||
# result_obj from fetch_site_status.run()
|
||||
@@ -415,7 +618,6 @@ class model(watch_base):
|
||||
logger.trace(f"{self.get('uuid')} - Updating {self.history_index_filename} with timestamp {timestamp}")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||
|
||||
# Binary data - detect file type and save without compression
|
||||
@@ -433,7 +635,7 @@ class model(watch_base):
|
||||
ext = 'bin'
|
||||
|
||||
snapshot_fname = f"{snapshot_id}.{ext}"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents)
|
||||
logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)")
|
||||
|
||||
@@ -443,7 +645,7 @@ class model(watch_base):
|
||||
# Compressed text
|
||||
import brotli
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
|
||||
if not os.path.exists(dest):
|
||||
try:
|
||||
@@ -454,16 +656,16 @@ class model(watch_base):
|
||||
logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
|
||||
# Fallback to uncompressed
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
else:
|
||||
# Plain text
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
|
||||
# Append to history.txt atomically
|
||||
index_fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
index_fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
index_line = f"{timestamp},{snapshot_fname}\n"
|
||||
|
||||
with open(index_fname, 'a', encoding='utf-8') as f:
|
||||
@@ -475,6 +677,17 @@ class model(watch_base):
|
||||
self.__newest_history_key = timestamp
|
||||
self.__history_n += 1
|
||||
|
||||
# MANUAL CHAIN RESOLUTION: Watch → Global
|
||||
# With Pydantic, this would become: maxlen = watch.resolved_history_snapshot_max_length
|
||||
# @computed_field def resolved_history_snapshot_max_length(self) -> Optional[int]:
|
||||
# if self.history_snapshot_max_length: return self.history_snapshot_max_length
|
||||
# if tag := self._get_override_tag(): return tag.history_snapshot_max_length
|
||||
# return self._datastore.settings.history_snapshot_max_length
|
||||
maxlen = self.get('history_snapshot_max_length') or self.get_global_setting('application', 'history_snapshot_max_length')
|
||||
|
||||
if maxlen and self.__history_n and self.__history_n > maxlen:
|
||||
self.history_trim(newest_n_items=maxlen)
|
||||
|
||||
# @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
|
||||
return snapshot_fname
|
||||
|
||||
@@ -527,7 +740,7 @@ class model(watch_base):
|
||||
return not local_lines.issubset(existing_history)
|
||||
|
||||
def get_screenshot(self):
|
||||
fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
fname = os.path.join(self.data_dir, "last-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
|
||||
@@ -542,7 +755,7 @@ class model(watch_base):
|
||||
if not favicon_fname:
|
||||
return True
|
||||
try:
|
||||
fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None)
|
||||
fname = next(iter(glob.glob(os.path.join(self.data_dir, "favicon.*"))), None)
|
||||
logger.trace(f"Favicon file maybe found at {fname}")
|
||||
if os.path.isfile(fname):
|
||||
file_age = int(time.time() - os.path.getmtime(fname))
|
||||
@@ -575,7 +788,7 @@ class model(watch_base):
|
||||
base = "favicon"
|
||||
extension = "ico"
|
||||
|
||||
fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
|
||||
fname = os.path.join(self.data_dir, f"favicon.{extension}")
|
||||
|
||||
try:
|
||||
# validate=True makes sure the string only contains valid base64 chars
|
||||
@@ -587,6 +800,11 @@ class model(watch_base):
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
# Invalidate favicon filename cache
|
||||
if hasattr(self, '_favicon_filename_cache'):
|
||||
delattr(self, '_favicon_filename_cache')
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
@@ -603,20 +821,32 @@ class model(watch_base):
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
||||
|
||||
Returns:
|
||||
bytes: Contents of the newest favicon file, or None if not found.
|
||||
str: Basename of the newest favicon file, or None if not found.
|
||||
"""
|
||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
||||
cache_key = '_favicon_filename_cache'
|
||||
if hasattr(self, cache_key):
|
||||
return getattr(self, cache_key)
|
||||
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
|
||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
return None
|
||||
result = None
|
||||
else:
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
result = os.path.basename(newest_file)
|
||||
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
return os.path.basename(newest_file)
|
||||
# Cache the result
|
||||
setattr(self, cache_key, result)
|
||||
return result
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
@@ -632,7 +862,7 @@ class model(watch_base):
|
||||
import os
|
||||
import time
|
||||
|
||||
thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
|
||||
thumbnail_path = os.path.join(self.data_dir, "thumbnail.jpeg")
|
||||
top_trim = 500 # Pixels from top of screenshot to use
|
||||
|
||||
screenshot_path = self.get_screenshot()
|
||||
@@ -683,7 +913,7 @@ class model(watch_base):
|
||||
return None
|
||||
|
||||
def __get_file_ctime(self, filename):
|
||||
fname = os.path.join(self.watch_data_dir, filename)
|
||||
fname = os.path.join(self.data_dir, filename)
|
||||
if os.path.isfile(fname):
|
||||
return int(os.path.getmtime(fname))
|
||||
return False
|
||||
@@ -708,14 +938,9 @@ class model(watch_base):
|
||||
def snapshot_error_screenshot_ctime(self):
|
||||
return self.__get_file_ctime('last-error-screenshot.png')
|
||||
|
||||
@property
|
||||
def watch_data_dir(self):
|
||||
# The base dir of the watch data
|
||||
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
|
||||
|
||||
def get_error_text(self):
|
||||
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
fname = os.path.join(self.data_dir, "last-error.txt")
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
@@ -723,7 +948,7 @@ class model(watch_base):
|
||||
|
||||
def get_error_snapshot(self):
|
||||
"""Return path to the screenshot that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
fname = os.path.join(self.data_dir, "last-error-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
return False
|
||||
@@ -747,6 +972,37 @@ class model(watch_base):
|
||||
def toggle_mute(self):
|
||||
self['notification_muted'] ^= True
|
||||
|
||||
def _get_commit_data(self):
|
||||
"""
|
||||
Prepare watch data for commit.
|
||||
|
||||
Excludes processor_config_* keys (stored in separate files).
|
||||
Normalizes browser_steps to empty list if no meaningful steps.
|
||||
"""
|
||||
import copy
|
||||
|
||||
# Get base snapshot with lock
|
||||
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
snapshot = dict(self)
|
||||
else:
|
||||
snapshot = dict(self)
|
||||
|
||||
# Exclude processor config keys (stored separately)
|
||||
watch_dict = {k: copy.deepcopy(v) for k, v in snapshot.items() if not k.startswith('processor_config_')}
|
||||
|
||||
# Normalize browser_steps: if no meaningful steps, save as empty list
|
||||
if not self.has_browser_steps:
|
||||
watch_dict['browser_steps'] = []
|
||||
|
||||
return watch_dict
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() method inherited from watch_base
|
||||
|
||||
|
||||
def extra_notification_token_values(self):
|
||||
# Used for providing extra tokens
|
||||
# return {'widget': 555}
|
||||
@@ -776,7 +1032,7 @@ class model(watch_base):
|
||||
if not csv_writer:
|
||||
# A file on the disk can be transferred much faster via flask than a string reply
|
||||
csv_output_filename = f"report-{self.get('uuid')}.csv"
|
||||
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
|
||||
f = open(os.path.join(self.data_dir, csv_output_filename), 'w')
|
||||
# @todo some headers in the future
|
||||
#fieldnames = ['Epoch seconds', 'Date']
|
||||
csv_writer = csv.writer(f,
|
||||
@@ -818,7 +1074,7 @@ class model(watch_base):
|
||||
|
||||
def save_error_text(self, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
target_path = os.path.join(self.data_dir, "last-error.txt")
|
||||
with open(target_path, 'w', encoding='utf-8') as f:
|
||||
f.write(contents)
|
||||
|
||||
@@ -827,9 +1083,9 @@ class model(watch_base):
|
||||
import zlib
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate")
|
||||
target_path = os.path.join(str(self.data_dir), "elements-error.deflate")
|
||||
else:
|
||||
target_path = os.path.join(str(self.watch_data_dir), "elements.deflate")
|
||||
target_path = os.path.join(str(self.data_dir), "elements.deflate")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
@@ -844,9 +1100,9 @@ class model(watch_base):
|
||||
def save_screenshot(self, screenshot: bytes, as_error=False):
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
target_path = os.path.join(self.data_dir, "last-error-screenshot.png")
|
||||
else:
|
||||
target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
target_path = os.path.join(self.data_dir, "last-screenshot.png")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
@@ -857,7 +1113,7 @@ class model(watch_base):
|
||||
|
||||
def get_last_fetched_text_before_filters(self):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
filepath = os.path.join(self.data_dir, 'last-fetched.br')
|
||||
|
||||
if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0:
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
@@ -872,13 +1128,13 @@ class model(watch_base):
|
||||
|
||||
def save_last_text_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
filepath = os.path.join(self.data_dir, 'last-fetched.br')
|
||||
_brotli_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
|
||||
def save_last_fetched_html(self, timestamp, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
_brotli_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
self._prune_last_fetched_html_snapshots()
|
||||
|
||||
@@ -886,7 +1142,7 @@ class model(watch_base):
|
||||
import brotli
|
||||
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
if os.path.isfile(filepath):
|
||||
with open(filepath, 'rb') as f:
|
||||
return (brotli.decompress(f.read()).decode('utf-8'))
|
||||
@@ -901,7 +1157,7 @@ class model(watch_base):
|
||||
|
||||
for index, timestamp in enumerate(dates):
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
|
||||
# Keep only the first 2
|
||||
if index > 1 and os.path.isfile(filepath):
|
||||
@@ -912,7 +1168,7 @@ class model(watch_base):
|
||||
def get_browsersteps_available_screenshots(self):
|
||||
"For knowing which screenshots are available to show the user in BrowserSteps UI"
|
||||
available = []
|
||||
for f in Path(self.watch_data_dir).glob('step_before-*.jpeg'):
|
||||
for f in Path(self.data_dir).glob('step_before-*.jpeg'):
|
||||
step_n=re.search(r'step_before-(\d+)', f.name)
|
||||
if step_n:
|
||||
available.append(step_n.group(1))
|
||||
|
||||
@@ -2,12 +2,169 @@ import os
|
||||
import uuid
|
||||
|
||||
from changedetectionio import strtobool
|
||||
from .persistence import EntityPersistenceMixin, _determine_entity_type
|
||||
|
||||
__all__ = ['EntityPersistenceMixin', 'watch_base']
|
||||
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||
|
||||
|
||||
class watch_base(dict):
|
||||
"""
|
||||
Base watch domain model (inherits from dict for backward compatibility).
|
||||
|
||||
WARNING: This class inherits from dict, which violates proper encapsulation.
|
||||
Dict inheritance is legacy technical debt that should be refactored to a proper
|
||||
domain model (e.g., Pydantic BaseModel) for better type safety and validation.
|
||||
|
||||
TODO: Migrate to Pydantic BaseModel for:
|
||||
- Type safety and IDE autocomplete
|
||||
- Automatic validation
|
||||
- Clear separation between domain model and serialization
|
||||
- Database backend abstraction (file → postgres → mongodb)
|
||||
- Configuration override chain resolution (Watch → Tag → Global)
|
||||
- Immutability options
|
||||
- Better testing
|
||||
|
||||
CHAIN RESOLUTION ARCHITECTURE:
|
||||
The dream is a 3-level override hierarchy:
|
||||
Watch settings → Tag/Group settings → Global settings
|
||||
|
||||
Current implementation: MANUAL resolution scattered across codebase
|
||||
- Processors manually check watch.get('field')
|
||||
- Loop through tags to find overrides_watch=True
|
||||
- Fall back to datastore['settings']['application']['field']
|
||||
|
||||
Pydantic implementation: AUTOMATIC resolution via @computed_field
|
||||
- Single source of truth for each setting's resolution logic
|
||||
- Type-safe, testable, self-documenting
|
||||
- Example: watch.resolved_fetch_backend (instead of nested dict navigation)
|
||||
|
||||
See: Watch.py model docstring for detailed Pydantic architecture plan
|
||||
See: Tag.py model docstring for tag override explanation
|
||||
See: processors/restock_diff/processor.py:184-192 for current manual example
|
||||
|
||||
Core Fields:
|
||||
uuid (str): Unique identifier for this watch (auto-generated)
|
||||
url (str): Target URL to monitor for changes
|
||||
title (str|None): Custom display name (overrides page_title if set)
|
||||
page_title (str|None): Title extracted from <title> tag of monitored page
|
||||
tags (List[str]): List of tag UUIDs for categorization
|
||||
tag (str): DEPRECATED - Old single-tag system, use tags instead
|
||||
|
||||
Check Configuration:
|
||||
processor (str): Processor type ('text_json_diff', 'restock_diff', etc.)
|
||||
fetch_backend (str): Fetcher to use ('system', 'html_requests', 'playwright', etc.)
|
||||
method (str): HTTP method ('GET', 'POST', etc.)
|
||||
headers (dict): Custom HTTP headers to send
|
||||
proxy (str|None): Preferred proxy server
|
||||
paused (bool): Whether change detection is paused
|
||||
|
||||
Scheduling:
|
||||
time_between_check (dict): Check interval {'weeks': int, 'days': int, 'hours': int, 'minutes': int, 'seconds': int}
|
||||
time_between_check_use_default (bool): Use global default interval if True
|
||||
time_schedule_limit (dict): Weekly schedule limiting when checks can run
|
||||
Structure: {
|
||||
'enabled': bool,
|
||||
'monday/tuesday/.../sunday': {
|
||||
'enabled': bool,
|
||||
'start_time': str ('HH:MM'),
|
||||
'duration': {'hours': str, 'minutes': str}
|
||||
}
|
||||
}
|
||||
|
||||
Content Filtering:
|
||||
include_filters (List[str]): CSS/XPath selectors to extract content
|
||||
subtractive_selectors (List[str]): Selectors to remove from content
|
||||
ignore_text (List[str]): Text patterns to ignore in change detection
|
||||
trigger_text (List[str]): Text/regex that must be present to trigger change
|
||||
text_should_not_be_present (List[str]): Text that should NOT be present
|
||||
extract_text (List[str]): Regex patterns to extract specific text after filtering
|
||||
|
||||
Text Processing:
|
||||
trim_text_whitespace (bool): Strip leading/trailing whitespace
|
||||
sort_text_alphabetically (bool): Sort lines alphabetically before comparison
|
||||
remove_duplicate_lines (bool): Remove duplicate lines
|
||||
check_unique_lines (bool): Compare against all history for unique lines
|
||||
strip_ignored_lines (bool|None): Remove lines matching ignore patterns
|
||||
|
||||
Change Detection Filters:
|
||||
filter_text_added (bool): Include added text in change detection
|
||||
filter_text_removed (bool): Include removed text in change detection
|
||||
filter_text_replaced (bool): Include replaced text in change detection
|
||||
|
||||
Browser Automation:
|
||||
browser_steps (List[dict]): Browser automation steps for JS-heavy sites
|
||||
browser_steps_last_error_step (int|None): Last step that caused error
|
||||
webdriver_delay (int|None): Seconds to wait after page load
|
||||
webdriver_js_execute_code (str|None): JavaScript to execute before extraction
|
||||
|
||||
Restock Detection:
|
||||
in_stock_only (bool): Only trigger on in-stock transitions
|
||||
follow_price_changes (bool): Monitor price changes
|
||||
has_ldjson_price_data (bool|None): Whether page has LD-JSON price data
|
||||
track_ldjson_price_data (str|None): Track LD-JSON price data ('ACCEPT', 'REJECT', None)
|
||||
price_change_threshold_percent (float|None): Minimum price change % to trigger
|
||||
|
||||
Notifications:
|
||||
notification_urls (List[str]): Apprise URLs for notifications
|
||||
notification_title (str|None): Custom notification title template
|
||||
notification_body (str|None): Custom notification body template
|
||||
notification_format (str): Notification format (e.g., 'System default', 'Text', 'HTML')
|
||||
notification_muted (bool): Disable notifications for this watch
|
||||
notification_screenshot (bool): Include screenshot in notifications
|
||||
notification_alert_count (int): Number of notifications sent
|
||||
last_notification_error (str|None): Last notification error message
|
||||
body (str|None): DEPRECATED? Legacy notification body field
|
||||
filter_failure_notification_send (bool): Send notification on filter failures
|
||||
|
||||
History & State:
|
||||
date_created (int|None): Unix timestamp of watch creation
|
||||
last_checked (int): Unix timestamp of last check
|
||||
last_viewed (int): History snapshot key of last user view
|
||||
last_error (str|bool): Last error message or False if no error
|
||||
check_count (int): Total number of checks performed
|
||||
fetch_time (float): Duration of last fetch in seconds
|
||||
consecutive_filter_failures (int): Counter for consecutive filter match failures
|
||||
previous_md5 (str|bool): MD5 hash of previous content
|
||||
previous_md5_before_filters (str|bool): MD5 hash before filters applied
|
||||
history_snapshot_max_length (int|None): Max history snapshots to keep (None = use global)
|
||||
|
||||
Conditions:
|
||||
conditions (dict): Custom conditions for change detection logic
|
||||
conditions_match_logic (str): Logic operator ('ALL', 'ANY') for conditions
|
||||
|
||||
Metadata:
|
||||
content-type (str|None): Content-Type from last fetch
|
||||
remote_server_reply (str|None): Server header from last response
|
||||
ignore_status_codes (List[int]|None): HTTP status codes to ignore
|
||||
use_page_title_in_list (bool|None): Display page title in watch list (None = use system default)
|
||||
|
||||
Instance Attributes (not serialized):
|
||||
__datastore: Reference to parent DataStore (set externally after creation)
|
||||
data_dir: Filesystem path for this watch's data directory
|
||||
|
||||
Notes:
|
||||
- Many fields default to None to distinguish "not set" from "set to default"
|
||||
- When field is None, system-level defaults are used
|
||||
- Processor-specific configs (e.g., processor_config_*) are NOT stored in watch.json
|
||||
They are stored in separate {processor_name}.json files
|
||||
- This class is used for both Watch and Tag objects (tags reuse the structure)
|
||||
"""
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Store datastore reference (common to Watch and Tag)
|
||||
# Use single underscore to avoid name mangling issues in subclasses
|
||||
self._datastore = kw.get('__datastore')
|
||||
if kw.get('__datastore'):
|
||||
del kw['__datastore']
|
||||
|
||||
# Store datastore_path (common to Watch and Tag)
|
||||
self._datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
self.update({
|
||||
# Custom notification content
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
@@ -32,6 +189,7 @@ class watch_base(dict):
|
||||
'filter_text_replaced': True,
|
||||
'follow_price_changes': True,
|
||||
'has_ldjson_price_data': None,
|
||||
'history_snapshot_max_length': None,
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'ignore_status_codes': None,
|
||||
@@ -139,4 +297,223 @@ class watch_base(dict):
|
||||
super(watch_base, self).__init__(*arg, **kw)
|
||||
|
||||
if self.get('default'):
|
||||
del self['default']
|
||||
del self['default']
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
"""
|
||||
Custom deepcopy for all watch_base subclasses (Watch, Tag, etc.).
|
||||
|
||||
CRITICAL FIX: Prevents copying large reference objects like __datastore
|
||||
which would cause exponential memory growth when Watch objects are deepcopied.
|
||||
|
||||
This is called by:
|
||||
- api/Watch.py:76 (API endpoint)
|
||||
- api/Tags.py:28 (Tags API)
|
||||
- processors/base.py:26 (EVERY processor run)
|
||||
- store/__init__.py:544 (clone watch)
|
||||
- And other locations
|
||||
"""
|
||||
from copy import deepcopy
|
||||
|
||||
# Create new instance without calling __init__
|
||||
cls = self.__class__
|
||||
new_obj = cls.__new__(cls)
|
||||
memo[id(self)] = new_obj
|
||||
|
||||
# Copy the dict data (all the settings)
|
||||
for key, value in self.items():
|
||||
new_obj[key] = deepcopy(value, memo)
|
||||
|
||||
# Copy instance attributes dynamically
|
||||
# This handles Watch-specific attrs (like __datastore) and any future subclass attrs
|
||||
for attr_name in dir(self):
|
||||
# Skip methods, special attrs, and dict keys
|
||||
if attr_name.startswith('_') and not attr_name.startswith('__'):
|
||||
# This catches _model__datastore, _model__history_n, etc.
|
||||
try:
|
||||
attr_value = getattr(self, attr_name)
|
||||
|
||||
# Special handling: Share references to large objects instead of copying
|
||||
# Examples: _datastore, __datastore, __app_reference, __global_settings, etc.
|
||||
if (attr_name == '_datastore' or
|
||||
attr_name.endswith('__datastore') or
|
||||
attr_name.endswith('__app')):
|
||||
# Share the reference (don't copy!) to prevent memory leaks
|
||||
setattr(new_obj, attr_name, attr_value)
|
||||
# Skip cache attributes - let them regenerate on demand
|
||||
elif 'cache' in attr_name.lower():
|
||||
pass # Don't copy caches
|
||||
# Copy regular instance attributes
|
||||
elif not callable(attr_value):
|
||||
setattr(new_obj, attr_name, attr_value)
|
||||
except AttributeError:
|
||||
pass # Attribute doesn't exist in this instance
|
||||
|
||||
return new_obj
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Custom pickle serialization for all watch_base subclasses.
|
||||
|
||||
Excludes large reference objects (like __datastore) from serialization.
|
||||
"""
|
||||
# Get the dict data
|
||||
state = dict(self)
|
||||
|
||||
# Collect instance attributes (excluding methods and large references)
|
||||
instance_attrs = {}
|
||||
for attr_name in dir(self):
|
||||
if attr_name.startswith('_') and not attr_name.startswith('__'):
|
||||
try:
|
||||
attr_value = getattr(self, attr_name)
|
||||
# Exclude large reference objects and caches from serialization
|
||||
if not (attr_name == '_datastore' or
|
||||
attr_name.endswith('__datastore') or
|
||||
attr_name.endswith('__app') or
|
||||
'cache' in attr_name.lower() or
|
||||
callable(attr_value)):
|
||||
instance_attrs[attr_name] = attr_value
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
if instance_attrs:
|
||||
state['__instance_metadata__'] = instance_attrs
|
||||
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""
|
||||
Custom pickle deserialization for all watch_base subclasses.
|
||||
|
||||
WARNING: Large reference objects (like __datastore) are NOT restored!
|
||||
Caller must restore these references after unpickling if needed.
|
||||
"""
|
||||
# Extract metadata
|
||||
metadata = state.pop('__instance_metadata__', {})
|
||||
|
||||
# Restore dict data
|
||||
self.update(state)
|
||||
|
||||
# Restore instance attributes
|
||||
for attr_name, attr_value in metadata.items():
|
||||
setattr(self, attr_name, attr_value)
|
||||
|
||||
@property
|
||||
def data_dir(self):
|
||||
"""
|
||||
The base directory for this watch/tag data (property, computed from UUID).
|
||||
|
||||
Common property for both Watch and Tag objects.
|
||||
Returns path like: /datastore/{uuid}/
|
||||
"""
|
||||
return os.path.join(self._datastore_path, self['uuid']) if self._datastore_path else None
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
"""
|
||||
Create the data directory if it doesn't exist.
|
||||
|
||||
Common method for both Watch and Tag objects.
|
||||
"""
|
||||
from loguru import logger
|
||||
if not os.path.isdir(self.data_dir):
|
||||
logger.debug(f"> Creating data dir {self.data_dir}")
|
||||
os.mkdir(self.data_dir)
|
||||
|
||||
def get_global_setting(self, *path):
|
||||
"""
|
||||
Get a setting from the global datastore configuration.
|
||||
|
||||
Args:
|
||||
*path: Path to the setting (e.g., 'application', 'history_snapshot_max_length')
|
||||
|
||||
Returns:
|
||||
The setting value, or None if not found
|
||||
|
||||
Example:
|
||||
maxlen = self.get_global_setting('application', 'history_snapshot_max_length')
|
||||
"""
|
||||
if not self._datastore:
|
||||
return None
|
||||
|
||||
try:
|
||||
value = self._datastore['settings']
|
||||
for key in path:
|
||||
value = value[key]
|
||||
return value
|
||||
except (KeyError, TypeError):
|
||||
return None
|
||||
|
||||
def _get_commit_data(self):
|
||||
"""
|
||||
Prepare data for commit (can be overridden by subclasses).
|
||||
|
||||
Returns:
|
||||
dict: Data to serialize (filtered as needed by subclass)
|
||||
"""
|
||||
import copy
|
||||
|
||||
# Acquire datastore lock to prevent concurrent modifications during copy
|
||||
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
snapshot = dict(self)
|
||||
else:
|
||||
snapshot = dict(self)
|
||||
|
||||
# Deep copy snapshot (slower, but done outside lock to minimize contention)
|
||||
# Subclasses can override to filter keys (e.g., Watch excludes processor_config_*)
|
||||
return {k: copy.deepcopy(v) for k, v in snapshot.items()}
|
||||
|
||||
def _save_to_disk(self, data_dict, uuid):
|
||||
"""
|
||||
Save data to disk (must be implemented by subclasses).
|
||||
|
||||
Args:
|
||||
data_dict: Dictionary to save
|
||||
uuid: UUID for logging
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If subclass doesn't implement
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _save_to_disk()")
|
||||
|
||||
def commit(self):
|
||||
"""
|
||||
Save this watch/tag immediately to disk using atomic write.
|
||||
|
||||
Common commit logic for Watch and Tag objects.
|
||||
Subclasses override _get_commit_data() and _save_to_disk() for specifics.
|
||||
|
||||
Fire-and-forget: Logs errors but does not raise exceptions.
|
||||
Data remains in memory even if save fails, so next commit will retry.
|
||||
"""
|
||||
from loguru import logger
|
||||
|
||||
if not self.data_dir:
|
||||
entity_type = self.__class__.__name__
|
||||
logger.error(f"Cannot commit {entity_type} {self.get('uuid')} without datastore_path")
|
||||
return
|
||||
|
||||
uuid = self.get('uuid')
|
||||
if not uuid:
|
||||
entity_type = self.__class__.__name__
|
||||
logger.error(f"Cannot commit {entity_type} without UUID")
|
||||
return
|
||||
|
||||
# Get data from subclass (may filter keys)
|
||||
try:
|
||||
data_dict = self._get_commit_data()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to prepare commit data for {uuid}: {e}")
|
||||
return
|
||||
|
||||
# Save to disk via subclass implementation
|
||||
try:
|
||||
# Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
|
||||
entity_type = _determine_entity_type(self.__class__)
|
||||
filename = f"{entity_type}.json"
|
||||
self._save_to_disk(data_dict, uuid)
|
||||
logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to commit {uuid}: {e}")
|
||||
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
Entity persistence mixin for Watch and Tag models.
|
||||
|
||||
Provides file-based persistence using atomic writes.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import inspect
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _determine_entity_type(cls):
|
||||
"""
|
||||
Determine entity type from class hierarchy (cached at class level).
|
||||
|
||||
Args:
|
||||
cls: The class to inspect
|
||||
|
||||
Returns:
|
||||
str: Entity type ('watch', 'tag', etc.)
|
||||
|
||||
Raises:
|
||||
ValueError: If entity type cannot be determined
|
||||
"""
|
||||
for base_class in inspect.getmro(cls):
|
||||
module_name = base_class.__module__
|
||||
if module_name.startswith('changedetectionio.model.'):
|
||||
# Get last part after dot: "changedetectionio.model.Watch" -> "watch"
|
||||
return module_name.split('.')[-1].lower()
|
||||
|
||||
raise ValueError(
|
||||
f"Cannot determine entity type for {cls.__module__}.{cls.__name__}. "
|
||||
f"Entity must inherit from a class in changedetectionio.model (Watch or Tag)."
|
||||
)
|
||||
|
||||
|
||||
class EntityPersistenceMixin:
|
||||
"""
|
||||
Mixin providing file persistence for watch_base subclasses (Watch, Tag, etc.).
|
||||
|
||||
This mixin provides the _save_to_disk() method required by watch_base.commit().
|
||||
It automatically determines the correct filename and size limits based on class hierarchy.
|
||||
|
||||
Usage:
|
||||
class model(EntityPersistenceMixin, watch_base): # in Watch.py
|
||||
pass
|
||||
|
||||
class model(EntityPersistenceMixin, watch_base): # in Tag.py
|
||||
pass
|
||||
"""
|
||||
|
||||
def _save_to_disk(self, data_dict, uuid):
|
||||
"""
|
||||
Save entity to disk using atomic write.
|
||||
|
||||
Implements the abstract method required by watch_base.commit().
|
||||
Automatically determines filename and size limits from class hierarchy.
|
||||
|
||||
Args:
|
||||
data_dict: Dictionary to save
|
||||
uuid: UUID for logging
|
||||
|
||||
Raises:
|
||||
ValueError: If entity type cannot be determined from class hierarchy
|
||||
"""
|
||||
# Import here to avoid circular dependency
|
||||
from changedetectionio.store.file_saving_datastore import save_entity_atomic
|
||||
|
||||
# Determine entity type (cached at class level, not instance level)
|
||||
entity_type = _determine_entity_type(self.__class__)
|
||||
|
||||
# Set filename and size limits based on entity type
|
||||
filename = f'{entity_type}.json'
|
||||
max_size_mb = 10 if entity_type == 'watch' else 1
|
||||
|
||||
# Save using generic function
|
||||
save_entity_atomic(
|
||||
self.data_dir,
|
||||
uuid,
|
||||
data_dict,
|
||||
filename=filename,
|
||||
entity_type=entity_type,
|
||||
max_size_mb=max_size_mb
|
||||
)
|
||||
@@ -105,6 +105,30 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def register_processor(self):
|
||||
"""Register an external processor plugin.
|
||||
|
||||
External packages can implement this hook to register custom processors
|
||||
that will be discovered alongside built-in processors.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with processor information:
|
||||
{
|
||||
'processor_name': str, # Machine name (e.g., 'osint_recon')
|
||||
'processor_module': module, # Module containing processor.py
|
||||
'processor_class': class, # The perform_site_check class
|
||||
'metadata': { # Optional metadata
|
||||
'name': str, # Display name
|
||||
'description': str, # Description
|
||||
'processor_weight': int,# Sort weight (lower = higher priority)
|
||||
'list_badge_text': str, # Badge text for UI
|
||||
}
|
||||
}
|
||||
Return None if this plugin doesn't provide a processor
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
|
||||
@@ -17,9 +17,11 @@ def find_sub_packages(package_name):
|
||||
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def find_processors():
|
||||
"""
|
||||
Find all subclasses of DifferenceDetectionProcessor in the specified package.
|
||||
Results are cached to avoid repeated discovery.
|
||||
|
||||
:param package_name: The name of the package to scan for processor modules.
|
||||
:return: A list of (module, class) tuples.
|
||||
@@ -46,6 +48,23 @@ def find_processors():
|
||||
except (ModuleNotFoundError, ImportError) as e:
|
||||
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
||||
|
||||
# Discover plugin processors via pluggy
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_module = result.get('processor_module')
|
||||
processor_name = result.get('processor_name')
|
||||
|
||||
if processor_module and processor_name:
|
||||
processors.append((processor_module, processor_name))
|
||||
plugin_path = getattr(processor_module, '__file__', 'unknown location')
|
||||
logger.info(f"Registered plugin processor: {processor_name} from {plugin_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error loading plugin processors: {e}")
|
||||
|
||||
return processors
|
||||
|
||||
|
||||
@@ -97,54 +116,137 @@ def find_processor_module(processor_name):
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_module(processor_name):
|
||||
"""
|
||||
Get the actual processor module (with perform_site_check class) by name.
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
|
||||
Returns:
|
||||
module: The processor module containing perform_site_check, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if processor_tuple:
|
||||
# Return the actual processor module (first element of tuple)
|
||||
return processor_tuple[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_submodule(processor_name, submodule_name):
|
||||
"""
|
||||
Get an optional submodule from a processor (e.g., 'difference', 'extract', 'preview').
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
submodule_name: Name of the submodule (e.g., 'difference', 'extract', 'preview')
|
||||
|
||||
Returns:
|
||||
module: The submodule if it exists, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if not processor_tuple:
|
||||
return None
|
||||
|
||||
processor_module = processor_tuple[0]
|
||||
parent_module = get_parent_module(processor_module)
|
||||
|
||||
if not parent_module:
|
||||
return None
|
||||
|
||||
# Try to import the submodule
|
||||
try:
|
||||
# For built-in processors: changedetectionio.processors.text_json_diff.difference
|
||||
# For plugin processors: changedetectionio_osint.difference
|
||||
parent_module_name = parent_module.__name__
|
||||
submodule_full_name = f"{parent_module_name}.{submodule_name}"
|
||||
return importlib.import_module(submodule_full_name)
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_plugin_processor_metadata():
|
||||
"""Get metadata from plugin processors."""
|
||||
metadata = {}
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_name = result.get('processor_name')
|
||||
meta = result.get('metadata', {})
|
||||
if processor_name:
|
||||
metadata[processor_name] = meta
|
||||
except Exception as e:
|
||||
logger.warning(f"Error getting plugin processor metadata: {e}")
|
||||
return metadata
|
||||
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
|
||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||
:return: A list :)
|
||||
"""
|
||||
|
||||
processor_classes = find_processors()
|
||||
|
||||
# Check if ALLOWED_PROCESSORS env var is set
|
||||
# For now we disable it, need to make a deploy with lots of new code and this will be an overload
|
||||
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', 'text_json_diff, restock_diff').strip()
|
||||
allowed_processors = None
|
||||
if allowed_processors_env:
|
||||
# Check if DISABLED_PROCESSORS env var is set
|
||||
disabled_processors_env = os.getenv('DISABLED_PROCESSORS', 'image_ssim_diff').strip()
|
||||
disabled_processors = []
|
||||
if disabled_processors_env:
|
||||
# Parse comma-separated list and strip whitespace
|
||||
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
|
||||
disabled_processors = [p.strip() for p in disabled_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"DISABLED_PROCESSORS set, disabling: {disabled_processors}")
|
||||
|
||||
available = []
|
||||
plugin_metadata = get_plugin_processor_metadata()
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Filter by allowed processors if set
|
||||
if allowed_processors and sub_package_name not in allowed_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
|
||||
# Skip disabled processors
|
||||
if sub_package_name in disabled_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (in DISABLED_PROCESSORS)")
|
||||
continue
|
||||
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
# Check if this is a plugin processor
|
||||
if sub_package_name in plugin_metadata:
|
||||
meta = plugin_metadata[sub_package_name]
|
||||
description = gettext(meta.get('name', sub_package_name))
|
||||
# Plugin processors start from weight 10 to separate them from built-in processors
|
||||
weight = 100 + meta.get('processor_weight', 0)
|
||||
else:
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
|
||||
available.append((sub_package_name, description, weight))
|
||||
|
||||
@@ -155,6 +257,20 @@ def available_processors():
|
||||
return [(name, desc) for name, desc, weight in available]
|
||||
|
||||
|
||||
def get_default_processor():
|
||||
"""
|
||||
Get the default processor to use when none is specified.
|
||||
Returns the first available processor based on weight (lowest weight = highest priority).
|
||||
This ensures forms auto-select a valid processor even when DISABLED_PROCESSORS filters the list.
|
||||
|
||||
:return: The processor name string (e.g., 'text_json_diff')
|
||||
"""
|
||||
available = available_processors()
|
||||
if available:
|
||||
return available[0][0] # Return the processor name from first tuple
|
||||
return 'text_json_diff' # Fallback if somehow no processors are available
|
||||
|
||||
|
||||
def get_processor_badge_texts():
|
||||
"""
|
||||
Get a dictionary mapping processor names to their list_badge_text values.
|
||||
@@ -279,3 +395,76 @@ def get_processor_badge_css():
|
||||
|
||||
return '\n\n'.join(css_rules)
|
||||
|
||||
|
||||
def save_processor_config(datastore, watch_uuid, config_data):
|
||||
"""
|
||||
Save processor-specific configuration to JSON file.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration storage.
|
||||
|
||||
Args:
|
||||
datastore: The application datastore instance
|
||||
watch_uuid: UUID of the watch
|
||||
config_data: Dictionary of configuration data to save (with processor_config_* prefix removed)
|
||||
|
||||
Returns:
|
||||
bool: True if saved successfully, False otherwise
|
||||
"""
|
||||
if not config_data:
|
||||
return True
|
||||
|
||||
try:
|
||||
from changedetectionio.processors.base import difference_detection_processor
|
||||
|
||||
# Get processor name from watch
|
||||
watch = datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
logger.error(f"Cannot save processor config: watch {watch_uuid} not found")
|
||||
return False
|
||||
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = difference_detection_processor(datastore, watch_uuid)
|
||||
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, config_data)
|
||||
|
||||
logger.debug(f"Saved processor config to {config_filename}: {config_data}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def extract_processor_config_from_form_data(form_data):
|
||||
"""
|
||||
Extract processor_config_* fields from form data and return separate dicts.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration extraction.
|
||||
|
||||
IMPORTANT: This function modifies form_data in-place by removing processor_config_* fields.
|
||||
|
||||
Args:
|
||||
form_data: Dictionary of form data (will be modified in-place)
|
||||
|
||||
Returns:
|
||||
dict: Dictionary of processor config data (with processor_config_* prefix removed)
|
||||
"""
|
||||
processor_config_data = {}
|
||||
|
||||
# Use list() to create a copy of keys since we're modifying the dict
|
||||
for field_name in list(form_data.keys()):
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
# Save all values (including empty strings) to allow explicit clearing of settings
|
||||
processor_config_data[config_key] = form_data[field_name]
|
||||
# Remove from form_data to prevent it from reaching datastore
|
||||
del form_data[field_name]
|
||||
|
||||
return processor_config_data
|
||||
|
||||
|
||||
@@ -23,7 +23,14 @@ class difference_detection_processor():
|
||||
def __init__(self, datastore, watch_uuid):
|
||||
self.datastore = datastore
|
||||
self.watch_uuid = watch_uuid
|
||||
|
||||
# Create a stable snapshot of the watch for processing
|
||||
# Why deepcopy?
|
||||
# 1. Prevents "dict changed during iteration" errors if watch is modified during processing
|
||||
# 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
|
||||
# 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
@@ -186,12 +193,12 @@ class difference_detection_processor():
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
data_dir = watch.data_dir
|
||||
|
||||
if not watch_data_dir:
|
||||
if not data_dir:
|
||||
return {}
|
||||
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
filepath = os.path.join(data_dir, filename)
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
return {}
|
||||
@@ -216,16 +223,16 @@ class difference_detection_processor():
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
data_dir = watch.data_dir
|
||||
|
||||
if not watch_data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
|
||||
if not data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no data_dir")
|
||||
return
|
||||
|
||||
# Ensure directory exists
|
||||
watch.ensure_data_dir_exists()
|
||||
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
filepath = os.path.join(data_dir, filename)
|
||||
|
||||
try:
|
||||
# If merge is enabled, read existing data first
|
||||
|
||||
@@ -12,6 +12,13 @@ processor_description = "Visual/Screenshot change detection (Fast)"
|
||||
processor_name = "image_ssim_diff"
|
||||
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = False
|
||||
supports_text_filters_and_triggers_elements = False
|
||||
supports_request_type = True
|
||||
|
||||
PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json"
|
||||
|
||||
# Subprocess timeout settings
|
||||
|
||||
@@ -414,7 +414,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
|
||||
|
||||
# Load historical data if available (for charts/visualization)
|
||||
comparison_data = {}
|
||||
comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
|
||||
comparison_config_path = os.path.join(watch.data_dir, "visual_comparison_data.json")
|
||||
if os.path.isfile(comparison_config_path):
|
||||
try:
|
||||
with open(comparison_config_path, 'r') as f:
|
||||
|
||||
@@ -90,7 +90,7 @@ def on_config_save(watch, processor_config, datastore):
|
||||
processor_config['auto_track_region'] = False
|
||||
|
||||
# Remove old template file if exists
|
||||
template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
template_path = os.path.join(watch.data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
if os.path.exists(template_path):
|
||||
os.remove(template_path)
|
||||
logger.debug(f"Removed old template file: {template_path}")
|
||||
|
||||
@@ -4,6 +4,13 @@ from changedetectionio.model.Watch import model as BaseWatch
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||
|
||||
@@ -193,18 +193,17 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
|
||||
itemprop_availability = {}
|
||||
multiple_prices_found = False
|
||||
|
||||
# Try built-in extraction first, this will scan metadata in the HTML
|
||||
try:
|
||||
itemprop_availability = get_itemprop_availability(self.fetcher.content)
|
||||
except MoreThanOnePriceFound as e:
|
||||
# Add the real data
|
||||
raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
# Don't raise immediately - let plugins try to handle this case
|
||||
# Plugins might be able to determine which price is correct
|
||||
logger.warning(f"Built-in detection found multiple prices on {watch.get('url')}, will try plugin override")
|
||||
multiple_prices_found = True
|
||||
itemprop_availability = {}
|
||||
|
||||
# If built-in extraction didn't get both price AND availability, try plugin override
|
||||
# Only check plugin if this watch is using a fetcher that might provide better data
|
||||
@@ -216,9 +215,21 @@ class perform_site_check(difference_detection_processor):
|
||||
from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
|
||||
fetcher_name = watch.get('fetch_backend', 'html_requests')
|
||||
|
||||
# Only try plugin override if not using system default (which might be anything)
|
||||
if fetcher_name and fetcher_name != 'system':
|
||||
logger.debug("Calling extra plugins for getting item price/availability")
|
||||
# Resolve 'system' to the actual fetcher being used
|
||||
# This allows plugins to work even when watch uses "system settings default"
|
||||
if fetcher_name == 'system':
|
||||
# Get the actual fetcher that was used (from self.fetcher)
|
||||
# Fetcher class name gives us the actual backend (e.g., 'html_requests', 'html_webdriver')
|
||||
actual_fetcher = type(self.fetcher).__name__
|
||||
if 'html_requests' in actual_fetcher.lower():
|
||||
fetcher_name = 'html_requests'
|
||||
elif 'webdriver' in actual_fetcher.lower() or 'playwright' in actual_fetcher.lower():
|
||||
fetcher_name = 'html_webdriver'
|
||||
logger.debug(f"Resolved 'system' fetcher to actual fetcher: {fetcher_name}")
|
||||
|
||||
# Try plugin override - plugins can decide if they support this fetcher
|
||||
if fetcher_name:
|
||||
logger.debug(f"Calling extra plugins for getting item price/availability (fetcher: {fetcher_name})")
|
||||
plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)
|
||||
|
||||
if plugin_availability:
|
||||
@@ -233,6 +244,16 @@ class perform_site_check(difference_detection_processor):
|
||||
if not plugin_availability:
|
||||
logger.debug("No item price/availability from plugins")
|
||||
|
||||
# If we had multiple prices and plugins also failed, NOW raise the exception
|
||||
if multiple_prices_found and not itemprop_availability.get('price'):
|
||||
raise ProcessorException(
|
||||
message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
|
||||
url=watch.get('url'),
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# Something valid in get_itemprop_availability() by scraping metadata ?
|
||||
if itemprop_availability.get('price') or itemprop_availability.get('availability'):
|
||||
# Store for other usage
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
|
||||
from loguru import logger
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
|
||||
|
||||
|
||||
def _task(watch, update_handler):
|
||||
@@ -49,7 +55,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
|
||||
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
|
||||
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.data_dir):
|
||||
# Splice in the temporary stuff from the form
|
||||
form = forms.processor_text_json_diff_form(formdata=form_data if request.method == 'POST' else None,
|
||||
data=form_data
|
||||
@@ -58,11 +64,11 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
# Only update vars that came in via the AJAX post
|
||||
p = {k: v for k, v in form.data.items() if k in form_data.keys()}
|
||||
tmp_watch.update(p)
|
||||
blank_watch_no_filters = watch_model()
|
||||
blank_watch_no_filters = watch_model(datastore_path=datastore.datastore_path, __datastore=datastore.data)
|
||||
blank_watch_no_filters['url'] = tmp_watch.get('url')
|
||||
|
||||
latest_filename = next(reversed(tmp_watch.history))
|
||||
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
html_fname = os.path.join(tmp_watch.data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
|
||||
|
||||
|
||||
+218
-131
@@ -5,51 +5,57 @@ import heapq
|
||||
import queue
|
||||
import threading
|
||||
|
||||
try:
|
||||
import janus
|
||||
except ImportError:
|
||||
logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus")
|
||||
raise
|
||||
# Janus is no longer required - we use pure threading.Queue for multi-loop support
|
||||
# try:
|
||||
# import janus
|
||||
# except ImportError:
|
||||
# pass # Not needed anymore
|
||||
|
||||
|
||||
class RecheckPriorityQueue:
|
||||
"""
|
||||
Ultra-reliable priority queue using janus for async/sync bridging.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers (the actual fetchers/processors) and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
|
||||
- Synchronous code (Flask, threads) cannot use async methods without blocking
|
||||
- Async code cannot use sync methods without blocking the event loop
|
||||
- janus provides the only safe bridge between these two worlds
|
||||
|
||||
Attempting to unify to async-only would require:
|
||||
- Converting all Flask routes to async (major breaking change)
|
||||
- Using asyncio.run() in sync contexts (causes deadlocks)
|
||||
- Thread-pool wrapping (adds complexity and overhead)
|
||||
|
||||
Minimal implementation focused on reliability:
|
||||
- Pure janus for sync/async bridge
|
||||
- Thread-safe priority ordering
|
||||
- Bulletproof error handling with critical logging
|
||||
Thread-safe priority queue supporting multiple async event loops.
|
||||
|
||||
ARCHITECTURE:
|
||||
- Multiple async workers, each with its own event loop in its own thread
|
||||
- Hybrid sync/async design for maximum scalability
|
||||
- Sync interface for ticker thread (threading.Queue)
|
||||
- Async interface for workers (asyncio.Event - NO executor threads!)
|
||||
|
||||
SCALABILITY:
|
||||
- Scales to 100-200+ workers without executor thread exhaustion
|
||||
- Async workers wait on asyncio.Event (pure coroutines, no threads)
|
||||
- Sync callers use threading.Queue (backward compatible)
|
||||
|
||||
WHY NOT JANUS:
|
||||
- Janus binds to ONE event loop at creation time
|
||||
- Our architecture has 15+ workers, each with separate event loops
|
||||
- Workers in different threads/loops cannot share janus async interface
|
||||
|
||||
WHY NOT RUN_IN_EXECUTOR:
|
||||
- With 200 workers, run_in_executor() would block 200 threads
|
||||
- Exhausts ThreadPoolExecutor, starves Flask HTTP handlers
|
||||
- Pure async approach uses 0 threads while waiting
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, ticker thread
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
|
||||
import asyncio
|
||||
|
||||
# Sync interface: threading.Queue for ticker thread and Flask routes
|
||||
self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)
|
||||
|
||||
# Priority storage - thread-safe
|
||||
self._priority_items = []
|
||||
self._lock = threading.RLock()
|
||||
|
||||
|
||||
# No event signaling needed - pure polling approach
|
||||
# Workers check queue every 50ms (latency acceptable: 0-500ms)
|
||||
# Scales to 1000+ workers: each sleeping worker = ~4KB coroutine, not thread
|
||||
|
||||
# Signals for UI updates
|
||||
self.queue_length_signal = signal('queue_length')
|
||||
|
||||
|
||||
logger.debug("RecheckPriorityQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
|
||||
@@ -58,38 +64,48 @@ class RecheckPriorityQueue:
|
||||
# SYNC INTERFACE (for ticker thread)
|
||||
def put(self, item, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with priority ordering"""
|
||||
logger.trace(f"RecheckQueue.put() called for item: {self._get_item_uuid(item)}, block={block}, timeout={timeout}")
|
||||
try:
|
||||
# Add to priority storage
|
||||
# CRITICAL: Add to both priority storage AND notification queue atomically
|
||||
# to prevent desynchronization where item exists but no notification
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus sync queue
|
||||
self.sync_q.put(True, block=block, timeout=timeout)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
|
||||
# Add notification - use blocking with timeout for safety
|
||||
# Notification queue is unlimited size, so should never block in practice
|
||||
# but timeout ensures we detect any unexpected issues (deadlock, etc)
|
||||
try:
|
||||
self._notification_queue.put(True, block=True, timeout=5.0)
|
||||
except Exception as notif_e:
|
||||
# Notification failed - MUST remove from priority_items to keep in sync
|
||||
# This prevents "Priority queue inconsistency" errors in get()
|
||||
logger.critical(f"CRITICAL: Notification queue put failed, removing from priority_items: {notif_e}")
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
raise # Re-raise to be caught by outer exception handler
|
||||
|
||||
# Signal emission after successful queue - log but don't fail the operation
|
||||
# Item is already safely queued, so signal failure shouldn't affect queue state
|
||||
try:
|
||||
self._emit_put_signals(item)
|
||||
except Exception as signal_e:
|
||||
logger.error(f"Failed to emit put signals but item queued successfully: {signal_e}")
|
||||
|
||||
logger.trace(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {type(e).__name__}: {str(e)}")
|
||||
# Item should have been cleaned up in the inner try/except if notification failed
|
||||
return False
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get with priority ordering"""
|
||||
import queue
|
||||
logger.trace(f"RecheckQueue.get() called, block={block}, timeout={timeout}")
|
||||
import queue as queue_module
|
||||
try:
|
||||
# Wait for notification
|
||||
self.sync_q.get(block=block, timeout=timeout)
|
||||
# Wait for notification (this doesn't return the actual item, just signals availability)
|
||||
self._notification_queue.get(block=block, timeout=timeout)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
@@ -98,69 +114,91 @@ class RecheckPriorityQueue:
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
# Signal emission after successful retrieval - log but don't lose the item
|
||||
# Item is already retrieved, so signal failure shouldn't affect queue state
|
||||
try:
|
||||
self._emit_get_signals()
|
||||
except Exception as signal_e:
|
||||
logger.error(f"Failed to emit get signals but item retrieved successfully: {signal_e}")
|
||||
|
||||
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
logger.trace(f"RecheckQueue.get() successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except queue.Empty:
|
||||
# Queue is empty with timeout - expected behavior, re-raise without logging
|
||||
raise
|
||||
except queue_module.Empty:
|
||||
# Queue is empty with timeout - expected behavior
|
||||
logger.trace(f"RecheckQueue.get() timed out - queue is empty (timeout={timeout})")
|
||||
raise # noqa
|
||||
except Exception as e:
|
||||
# Re-raise without logging - caller (worker) will handle and log appropriately
|
||||
logger.trace(f"RecheckQueue.get() failed with exception: {type(e).__name__}: {str(e)}")
|
||||
raise
|
||||
|
||||
# ASYNC INTERFACE (for workers)
|
||||
async def async_put(self, item):
|
||||
"""Pure async put with priority ordering"""
|
||||
async def async_put(self, item, executor=None):
|
||||
"""Async put with priority ordering - uses thread pool to avoid blocking
|
||||
|
||||
Args:
|
||||
item: Item to add to queue
|
||||
executor: Optional ThreadPoolExecutor. If None, uses default pool.
|
||||
"""
|
||||
logger.trace(f"RecheckQueue.async_put() called for item: {self._get_item_uuid(item)}, executor={executor}")
|
||||
import asyncio
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus async queue
|
||||
await self.async_q.put(True)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
# Use run_in_executor to call sync put without blocking event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
executor, # Use provided executor or default
|
||||
lambda: self.put(item, block=True, timeout=5.0)
|
||||
)
|
||||
|
||||
logger.trace(f"RecheckQueue.async_put() successfully queued item: {self._get_item_uuid(item)}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get with priority ordering"""
|
||||
|
||||
async def async_get(self, executor=None, timeout=1.0):
|
||||
"""
|
||||
Efficient async get using executor for blocking call.
|
||||
|
||||
HYBRID APPROACH: Best of both worlds
|
||||
- Uses run_in_executor for efficient blocking (no polling overhead)
|
||||
- Single timeout (no double-timeout race condition)
|
||||
- Scales well: executor sized to match worker count
|
||||
|
||||
With FETCH_WORKERS=10: 10 threads blocked max (acceptable)
|
||||
With FETCH_WORKERS=200: Need executor with 200+ threads (see worker_pool.py)
|
||||
|
||||
Args:
|
||||
executor: ThreadPoolExecutor (sized to match worker count)
|
||||
timeout: Maximum time to wait in seconds
|
||||
|
||||
Returns:
|
||||
Item from queue
|
||||
|
||||
Raises:
|
||||
queue.Empty: If timeout expires with no item available
|
||||
"""
|
||||
logger.trace(f"RecheckQueue.async_get() called, timeout={timeout}")
|
||||
import asyncio
|
||||
try:
|
||||
# Wait for notification
|
||||
await self.async_q.get()
|
||||
# Use run_in_executor to call sync get efficiently
|
||||
# No outer asyncio.wait_for wrapper = no double timeout issue!
|
||||
loop = asyncio.get_event_loop()
|
||||
item = await loop.run_in_executor(
|
||||
executor,
|
||||
lambda: self.get(block=True, timeout=timeout)
|
||||
)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
|
||||
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
|
||||
logger.trace(f"RecheckQueue.async_get() successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except queue.Empty:
|
||||
logger.trace(f"RecheckQueue.async_get() timed out - queue is empty")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {type(e).__name__}: {str(e)}")
|
||||
raise
|
||||
|
||||
# UTILITY METHODS
|
||||
@@ -186,10 +224,35 @@ class RecheckPriorityQueue:
|
||||
logger.critical(f"CRITICAL: Failed to get queued UUIDs: {str(e)}")
|
||||
return []
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
def clear(self):
|
||||
"""Clear all items from both priority storage and notification queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
with self._lock:
|
||||
# Clear priority items
|
||||
self._priority_items.clear()
|
||||
|
||||
# Drain all notifications to prevent stale notifications
|
||||
# This is critical for test cleanup to prevent queue desynchronization
|
||||
drained = 0
|
||||
while not self._notification_queue.empty():
|
||||
try:
|
||||
self._notification_queue.get_nowait()
|
||||
drained += 1
|
||||
except queue.Empty:
|
||||
break
|
||||
|
||||
if drained > 0:
|
||||
logger.debug(f"Cleared queue: removed {drained} notifications")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to clear queue: {str(e)}")
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""Close the queue"""
|
||||
try:
|
||||
# Nothing to close for threading.Queue
|
||||
logger.debug("RecheckPriorityQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")
|
||||
@@ -321,7 +384,7 @@ class RecheckPriorityQueue:
|
||||
except Exception:
|
||||
pass
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def _emit_put_signals(self, item):
|
||||
"""Emit signals when item is added"""
|
||||
try:
|
||||
@@ -330,14 +393,14 @@ class RecheckPriorityQueue:
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=item.item['uuid'])
|
||||
|
||||
|
||||
# Queue length signal
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}")
|
||||
|
||||
|
||||
def _emit_get_signals(self):
|
||||
"""Emit signals when item is removed"""
|
||||
try:
|
||||
@@ -363,12 +426,11 @@ class NotificationQueue:
|
||||
|
||||
def __init__(self, maxsize: int = 0, datastore=None):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, threads
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
# Use pure threading.Queue to avoid event loop binding issues
|
||||
self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)
|
||||
self.notification_event_signal = signal('notification_event')
|
||||
self.datastore = datastore # For checking all_muted setting
|
||||
self._lock = threading.RLock()
|
||||
logger.debug("NotificationQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
|
||||
@@ -380,72 +442,97 @@ class NotificationQueue:
|
||||
|
||||
def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with signal emission"""
|
||||
logger.trace(f"NotificationQueue.put() called for item: {item.get('uuid', 'unknown')}, block={block}, timeout={timeout}")
|
||||
try:
|
||||
# Check if all notifications are muted
|
||||
if self.datastore and self.datastore.data['settings']['application'].get('all_muted', False):
|
||||
logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
|
||||
return False
|
||||
|
||||
self.sync_q.put(item, block=block, timeout=timeout)
|
||||
with self._lock:
|
||||
self._notification_queue.put(item, block=block, timeout=timeout)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
logger.trace(f"NotificationQueue.put() successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_put(self, item: Dict[str, Any]):
|
||||
"""Pure async put with signal emission"""
|
||||
async def async_put(self, item: Dict[str, Any], executor=None):
|
||||
"""Async put with signal emission - uses thread pool
|
||||
|
||||
Args:
|
||||
item: Notification item to queue
|
||||
executor: Optional ThreadPoolExecutor
|
||||
"""
|
||||
logger.trace(f"NotificationQueue.async_put() called for item: {item.get('uuid', 'unknown')}, executor={executor}")
|
||||
import asyncio
|
||||
try:
|
||||
# Check if all notifications are muted
|
||||
if self.datastore and self.datastore.data['settings']['application'].get('all_muted', False):
|
||||
logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
|
||||
return False
|
||||
|
||||
await self.async_q.put(item)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(executor, lambda: self.put(item, block=True, timeout=5.0))
|
||||
logger.trace(f"NotificationQueue.async_put() successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get"""
|
||||
logger.trace(f"NotificationQueue.get() called, block={block}, timeout={timeout}")
|
||||
try:
|
||||
return self.sync_q.get(block=block, timeout=timeout)
|
||||
with self._lock:
|
||||
item = self._notification_queue.get(block=block, timeout=timeout)
|
||||
logger.trace(f"NotificationQueue.get() retrieved item: {item.get('uuid', 'unknown') if isinstance(item, dict) else 'unknown'}")
|
||||
return item
|
||||
except queue.Empty as e:
|
||||
logger.trace(f"NotificationQueue.get() timed out - queue is empty (timeout={timeout})")
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {type(e).__name__}: {str(e)}")
|
||||
raise e
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get"""
|
||||
|
||||
async def async_get(self, executor=None):
|
||||
"""Async get - uses thread pool
|
||||
|
||||
Args:
|
||||
executor: Optional ThreadPoolExecutor
|
||||
"""
|
||||
logger.trace(f"NotificationQueue.async_get() called, executor={executor}")
|
||||
import asyncio
|
||||
try:
|
||||
return await self.async_q.get()
|
||||
loop = asyncio.get_event_loop()
|
||||
item = await loop.run_in_executor(executor, lambda: self.get(block=True, timeout=1.0))
|
||||
logger.trace(f"NotificationQueue.async_get() retrieved item: {item.get('uuid', 'unknown') if isinstance(item, dict) else 'unknown'}")
|
||||
return item
|
||||
except queue.Empty as e:
|
||||
logger.trace(f"NotificationQueue.async_get() timed out - queue is empty")
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to async get notification: {type(e).__name__}: {str(e)}")
|
||||
raise e
|
||||
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
return self.sync_q.qsize()
|
||||
with self._lock:
|
||||
return self._notification_queue.qsize()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
|
||||
return 0
|
||||
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
"""Close the queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
# Nothing to close for threading.Queue
|
||||
logger.debug("NotificationQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")
|
||||
|
||||
@@ -37,9 +37,9 @@ def register_watch_operation_handlers(socketio, datastore):
|
||||
# Import here to avoid circular imports
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
logger.info(f"Socket.IO: Queued recheck for watch {uuid}")
|
||||
else:
|
||||
emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'})
|
||||
|
||||
@@ -145,10 +145,10 @@ def handle_watch_update(socketio, **kwargs):
|
||||
# Emit the watch update to all connected clients
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio.flask_app import _jinja2_filter_datetime
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
# Get list of watches that are currently running
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
# Get list of watches in the queue (efficient single-lock method)
|
||||
queue_list = update_q.get_queued_uuids()
|
||||
@@ -252,7 +252,7 @@ def init_socketio(app, datastore):
|
||||
def event_checkbox_operations(data):
|
||||
from changedetectionio.blueprint.ui import _handle_operations
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, watch_check_update
|
||||
import threading
|
||||
|
||||
@@ -268,7 +268,7 @@ def init_socketio(app, datastore):
|
||||
uuids=data.get('uuids'),
|
||||
datastore=datastore,
|
||||
extra_data=data.get('extra_data'),
|
||||
worker_handler=worker_handler,
|
||||
worker_pool=worker_pool,
|
||||
update_q=update_q,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
watch_check_update=watch_check_update,
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
rm tests/logs/* -f
|
||||
|
||||
# Since theres no curl installed lets roll with python3
|
||||
check_sanity() {
|
||||
@@ -64,18 +65,21 @@ data_sanity_test
|
||||
echo "-------------------- Running rest of tests in parallel -------------------------------"
|
||||
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false \
|
||||
FETCH_WORKERS=2 REMOVE_REQUESTS_OLD_SCREENSHOTS=false \
|
||||
pytest tests/test_*.py \
|
||||
-n 30 \
|
||||
-n 8 \
|
||||
--dist=load \
|
||||
-vvv \
|
||||
-s \
|
||||
--capture=no \
|
||||
-k "not test_queue_system" \
|
||||
--log-cli-level=DEBUG \
|
||||
--log-cli-format="%(asctime)s [%(process)d] [%(levelname)s] %(name)s: %(message)s"
|
||||
|
||||
echo "---------------------------- DONE parallel test ---------------------------------------"
|
||||
|
||||
FETCH_WORKERS=20 pytest -vvv -s tests/test_queue_handler.py
|
||||
|
||||
echo "RUNNING WITH BASE_URL SET"
|
||||
|
||||
# Now re-run some tests with BASE_URL enabled
|
||||
|
||||
@@ -1,19 +1,25 @@
|
||||
{
|
||||
"name": "",
|
||||
"short_name": "",
|
||||
"name": "ChangeDetection.io",
|
||||
"short_name": "ChangeDetect",
|
||||
"description": "Self-hosted website change detection and monitoring",
|
||||
"icons": [
|
||||
{
|
||||
"src": "android-chrome-192x192.png",
|
||||
"sizes": "192x192",
|
||||
"type": "image/png"
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
},
|
||||
{
|
||||
"src": "android-chrome-256x256.png",
|
||||
"sizes": "256x256",
|
||||
"type": "image/png"
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
}
|
||||
],
|
||||
"theme_color": "#ffffff",
|
||||
"start_url": "/",
|
||||
"theme_color": "#5bbad5",
|
||||
"background_color": "#ffffff",
|
||||
"display": "standalone"
|
||||
"display": "standalone",
|
||||
"categories": ["utilities", "productivity"],
|
||||
"orientation": "any"
|
||||
}
|
||||
|
||||
@@ -184,7 +184,8 @@ $(document).ready(function() {
|
||||
}
|
||||
// If it's a button in a form, submit the form
|
||||
else if ($element.is('button')) {
|
||||
$element.closest('form').submit();
|
||||
// Use requestSubmit() to include the button's name/value in the form data
|
||||
$element.closest('form')[0].requestSubmit($element[0]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -222,6 +222,19 @@ code {
|
||||
color: var(--color-white);
|
||||
background: var(--color-text-watch-tag-list);
|
||||
@extend .inline-tag;
|
||||
|
||||
/* Remove default anchor styling when used as links */
|
||||
text-decoration: none;
|
||||
|
||||
&:hover {
|
||||
text-decoration: none;
|
||||
opacity: 0.8;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
&:visited {
|
||||
color: var(--color-white);
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
Base classes for the datastore.
|
||||
|
||||
This module defines the abstract interfaces that all datastore implementations must follow.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from threading import Lock
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class DataStore(ABC):
|
||||
"""
|
||||
Abstract base class for all datastore implementations.
|
||||
|
||||
Defines the core interface that all datastores must implement for:
|
||||
- Loading and saving data
|
||||
- Managing watches
|
||||
- Handling settings
|
||||
- Providing data access
|
||||
"""
|
||||
|
||||
lock = Lock()
|
||||
datastore_path = None
|
||||
|
||||
@abstractmethod
|
||||
def reload_state(self, datastore_path, include_default_watches, version_tag):
|
||||
"""
|
||||
Load data from persistent storage.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
include_default_watches: Whether to create default watches if none exist
|
||||
version_tag: Application version string
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def add_watch(self, url, **kwargs):
|
||||
"""
|
||||
Add a new watch.
|
||||
|
||||
Args:
|
||||
url: URL to watch
|
||||
**kwargs: Additional watch parameters
|
||||
|
||||
Returns:
|
||||
UUID of the created watch
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def update_watch(self, uuid, update_obj):
|
||||
"""
|
||||
Update an existing watch.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
update_obj: Dictionary of fields to update
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete(self, uuid):
|
||||
"""
|
||||
Delete a watch.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID to delete
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def data(self):
|
||||
"""
|
||||
Access to the underlying data structure.
|
||||
|
||||
Returns:
|
||||
Dictionary containing all datastore data
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,509 @@
|
||||
"""
|
||||
File-based datastore with individual watch persistence and immediate commits.
|
||||
|
||||
This module provides the FileSavingDataStore abstract class that implements:
|
||||
- Individual watch.json file persistence
|
||||
- Immediate commit-based persistence (watch.commit(), datastore.commit())
|
||||
- Atomic file writes safe for NFS/NAS
|
||||
"""
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from .base import DataStore
|
||||
from .. import strtobool
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
HAS_ORJSON = True
|
||||
except ImportError:
|
||||
HAS_ORJSON = False
|
||||
|
||||
# Fsync configuration: Force file data to disk for crash safety
|
||||
# Default False to match legacy behavior (write-and-rename without fsync)
|
||||
# Set to True for mission-critical deployments requiring crash consistency
|
||||
FORCE_FSYNC_DATA_IS_CRITICAL = bool(strtobool(os.getenv('FORCE_FSYNC_DATA_IS_CRITICAL', 'False')))
|
||||
|
||||
# ============================================================================
|
||||
# Helper Functions for Atomic File Operations
|
||||
# ============================================================================
|
||||
|
||||
def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
|
||||
"""
|
||||
Save JSON data to disk using atomic write pattern.
|
||||
|
||||
Generic helper for saving any JSON data (settings, watches, etc.) with:
|
||||
- Atomic write (temp file + rename)
|
||||
- Directory fsync for crash consistency (only for new files)
|
||||
- Size validation
|
||||
- Proper error handling
|
||||
|
||||
Thread safety: Caller must hold datastore.lock to prevent concurrent modifications.
|
||||
Multi-process safety: Not supported - run only one app instance per datastore.
|
||||
|
||||
Args:
|
||||
file_path: Full path to target JSON file
|
||||
data_dict: Dictionary to serialize
|
||||
label: Human-readable label for error messages (e.g., "watch", "settings")
|
||||
max_size_mb: Maximum allowed file size in MB
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds max_size_mb
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
"""
|
||||
# Check if file already exists (before we start writing)
|
||||
# Directory fsync only needed for NEW files to persist the filename
|
||||
file_exists = os.path.exists(file_path)
|
||||
|
||||
# Ensure parent directory exists
|
||||
parent_dir = os.path.dirname(file_path)
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
|
||||
# Create temp file in same directory (required for NFS atomicity)
|
||||
fd, temp_path = tempfile.mkstemp(
|
||||
suffix='.tmp',
|
||||
prefix='json-',
|
||||
dir=parent_dir,
|
||||
text=False
|
||||
)
|
||||
|
||||
fd_closed = False
|
||||
try:
|
||||
# Serialize data
|
||||
t0 = time.time()
|
||||
if HAS_ORJSON:
|
||||
data = orjson.dumps(data_dict, option=orjson.OPT_INDENT_2)
|
||||
else:
|
||||
data = json.dumps(data_dict, indent=2, ensure_ascii=False).encode('utf-8')
|
||||
serialize_ms = (time.time() - t0) * 1000
|
||||
|
||||
# Safety check: validate size
|
||||
MAX_SIZE = max_size_mb * 1024 * 1024
|
||||
data_size = len(data)
|
||||
if data_size > MAX_SIZE:
|
||||
raise ValueError(
|
||||
f"{label.capitalize()} data is unexpectedly large: {data_size / 1024 / 1024:.2f}MB "
|
||||
f"(max: {max_size_mb}MB). This indicates a bug or data corruption."
|
||||
)
|
||||
|
||||
# Write to temp file
|
||||
t1 = time.time()
|
||||
os.write(fd, data)
|
||||
write_ms = (time.time() - t1) * 1000
|
||||
|
||||
# Optional fsync: Force file data to disk for crash safety
|
||||
# Only if FORCE_FSYNC_DATA_IS_CRITICAL=True (default: False, matches legacy behavior)
|
||||
t2 = time.time()
|
||||
if FORCE_FSYNC_DATA_IS_CRITICAL:
|
||||
os.fsync(fd)
|
||||
file_fsync_ms = (time.time() - t2) * 1000
|
||||
|
||||
os.close(fd)
|
||||
fd_closed = True
|
||||
|
||||
# Atomic rename
|
||||
t3 = time.time()
|
||||
os.replace(temp_path, file_path)
|
||||
rename_ms = (time.time() - t3) * 1000
|
||||
|
||||
# Sync directory to ensure filename metadata is durable
|
||||
# OPTIMIZATION: Only needed for NEW files. Existing files already have
|
||||
# directory entry persisted, so we only need file fsync for data durability.
|
||||
dir_fsync_ms = 0
|
||||
if not file_exists:
|
||||
try:
|
||||
dir_fd = os.open(parent_dir, os.O_RDONLY)
|
||||
try:
|
||||
t4 = time.time()
|
||||
os.fsync(dir_fd)
|
||||
dir_fsync_ms = (time.time() - t4) * 1000
|
||||
finally:
|
||||
os.close(dir_fd)
|
||||
except (OSError, AttributeError):
|
||||
# Windows doesn't support fsync on directories
|
||||
pass
|
||||
|
||||
# Log timing breakdown for slow saves
|
||||
# total_ms = serialize_ms + write_ms + file_fsync_ms + rename_ms + dir_fsync_ms
|
||||
# if total_ms: # Log if save took more than 10ms
|
||||
# file_status = "new" if not file_exists else "update"
|
||||
# logger.trace(
|
||||
# f"Save timing breakdown ({total_ms:.1f}ms total, {file_status}): "
|
||||
# f"serialize={serialize_ms:.1f}ms, write={write_ms:.1f}ms, "
|
||||
# f"file_fsync={file_fsync_ms:.1f}ms, rename={rename_ms:.1f}ms, "
|
||||
# f"dir_fsync={dir_fsync_ms:.1f}ms, using_orjson={HAS_ORJSON}"
|
||||
# )
|
||||
|
||||
except OSError as e:
|
||||
# Cleanup temp file
|
||||
if not fd_closed:
|
||||
try:
|
||||
os.close(fd)
|
||||
except:
|
||||
pass
|
||||
if os.path.exists(temp_path):
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Provide helpful error messages
|
||||
if e.errno == 28: # ENOSPC
|
||||
raise OSError(f"Disk full: Cannot save {label}") from e
|
||||
elif e.errno == 122: # EDQUOT
|
||||
raise OSError(f"Disk quota exceeded: Cannot save {label}") from e
|
||||
else:
|
||||
raise OSError(f"I/O error saving {label}: {e}") from e
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup temp file
|
||||
if not fd_closed:
|
||||
try:
|
||||
os.close(fd)
|
||||
except:
|
||||
pass
|
||||
if os.path.exists(temp_path):
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except:
|
||||
pass
|
||||
raise e
|
||||
|
||||
|
||||
def save_entity_atomic(entity_dir, uuid, entity_dict, filename, entity_type, max_size_mb):
|
||||
"""
|
||||
Save an entity (watch/tag) to disk using atomic write pattern.
|
||||
|
||||
Generic function for saving any watch_base subclass (Watch, Tag, etc.).
|
||||
|
||||
Args:
|
||||
entity_dir: Directory for this entity (e.g., /datastore/{uuid})
|
||||
uuid: Entity UUID (for logging)
|
||||
entity_dict: Dictionary representation of the entity
|
||||
filename: JSON filename (e.g., 'watch.json', 'tag.json')
|
||||
entity_type: Type label for logging (e.g., 'watch', 'tag')
|
||||
max_size_mb: Maximum allowed file size in MB
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds max_size_mb
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
"""
|
||||
entity_json = os.path.join(entity_dir, filename)
|
||||
save_json_atomic(entity_json, entity_dict, label=f"{entity_type} {uuid}", max_size_mb=max_size_mb)
|
||||
|
||||
|
||||
def save_watch_atomic(watch_dir, uuid, watch_dict):
|
||||
"""
|
||||
Save a watch to disk using atomic write pattern.
|
||||
|
||||
Convenience wrapper around save_entity_atomic for watches.
|
||||
Kept for backwards compatibility.
|
||||
"""
|
||||
save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)
|
||||
|
||||
|
||||
|
||||
def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
"""
|
||||
Load a watch from its JSON file.
|
||||
|
||||
Args:
|
||||
watch_json: Path to the watch.json file
|
||||
uuid: Watch UUID
|
||||
rehydrate_entity_func: Function to convert dict to Watch object
|
||||
|
||||
Returns:
|
||||
Watch object or None if failed
|
||||
"""
|
||||
try:
|
||||
# Check file size before reading
|
||||
file_size = os.path.getsize(watch_json)
|
||||
MAX_WATCH_SIZE = 10 * 1024 * 1024 # 10MB
|
||||
if file_size > MAX_WATCH_SIZE:
|
||||
logger.critical(
|
||||
f"CORRUPTED WATCH DATA: Watch {uuid} file is unexpectedly large: "
|
||||
f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_WATCH_SIZE / 1024 / 1024}MB). "
|
||||
f"File: {watch_json}. This indicates a bug or data corruption. "
|
||||
f"Watch will be skipped."
|
||||
)
|
||||
return None
|
||||
|
||||
if HAS_ORJSON:
|
||||
with open(watch_json, 'rb') as f:
|
||||
watch_data = orjson.loads(f.read())
|
||||
else:
|
||||
with open(watch_json, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
|
||||
# Rehydrate and return watch object
|
||||
watch_obj = rehydrate_entity_func(uuid, watch_data)
|
||||
return watch_obj
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.critical(
|
||||
f"CORRUPTED WATCH DATA: Failed to parse JSON for watch {uuid}. "
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
except ValueError as e:
|
||||
# orjson raises ValueError for invalid JSON
|
||||
if "invalid json" in str(e).lower() or HAS_ORJSON:
|
||||
logger.critical(
|
||||
f"CORRUPTED WATCH DATA: Failed to parse JSON for watch {uuid}. "
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
# Re-raise if it's not a JSON parsing error
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def load_all_watches(datastore_path, rehydrate_entity_func):
|
||||
"""
|
||||
Load all watches from individual watch.json files.
|
||||
|
||||
SYNCHRONOUS loading: Blocks until all watches are loaded.
|
||||
This ensures data consistency - web server won't accept requests
|
||||
until all watches are available. Progress logged every 100 watches.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
rehydrate_entity_func: Function to convert dict to Watch object
|
||||
|
||||
Returns:
|
||||
Dictionary of uuid -> Watch object
|
||||
"""
|
||||
start_time = time.time()
|
||||
logger.info("Loading watches from individual watch.json files...")
|
||||
|
||||
watching = {}
|
||||
|
||||
if not os.path.exists(datastore_path):
|
||||
return watching
|
||||
|
||||
# Find all watch.json files using glob (faster than manual directory traversal)
|
||||
glob_start = time.time()
|
||||
watch_files = glob.glob(os.path.join(datastore_path, "*", "watch.json"))
|
||||
glob_time = time.time() - glob_start
|
||||
|
||||
total = len(watch_files)
|
||||
logger.debug(f"Found {total} watch.json files in {glob_time:.3f}s")
|
||||
|
||||
loaded = 0
|
||||
failed = 0
|
||||
|
||||
for watch_json in watch_files:
|
||||
# Extract UUID from path: /datastore/{uuid}/watch.json
|
||||
uuid_dir = os.path.basename(os.path.dirname(watch_json))
|
||||
watch = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
|
||||
if watch:
|
||||
watching[uuid_dir] = watch
|
||||
loaded += 1
|
||||
|
||||
if loaded % 100 == 0:
|
||||
logger.info(f"Loaded {loaded}/{total} watches...")
|
||||
else:
|
||||
# load_watch_from_file already logged the specific error
|
||||
failed += 1
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if failed > 0:
|
||||
logger.critical(
|
||||
f"LOAD COMPLETE: {loaded} watches loaded successfully, "
|
||||
f"{failed} watches FAILED to load (corrupted or invalid) "
|
||||
f"in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)"
|
||||
)
|
||||
else:
|
||||
logger.info(f"Loaded {loaded} watches from disk in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)")
|
||||
|
||||
return watching
|
||||
|
||||
|
||||
def load_tag_from_file(tag_json, uuid, rehydrate_entity_func):
|
||||
"""
|
||||
Load a tag from its JSON file.
|
||||
|
||||
Args:
|
||||
tag_json: Path to the tag.json file
|
||||
uuid: Tag UUID
|
||||
rehydrate_entity_func: Function to convert dict to Tag object
|
||||
|
||||
Returns:
|
||||
Tag object or None if failed
|
||||
"""
|
||||
try:
|
||||
# Check file size before reading
|
||||
file_size = os.path.getsize(tag_json)
|
||||
MAX_TAG_SIZE = 1 * 1024 * 1024 # 1MB
|
||||
if file_size > MAX_TAG_SIZE:
|
||||
logger.critical(
|
||||
f"CORRUPTED TAG DATA: Tag {uuid} file is unexpectedly large: "
|
||||
f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_TAG_SIZE / 1024 / 1024}MB). "
|
||||
f"File: {tag_json}. This indicates a bug or data corruption. "
|
||||
f"Tag will be skipped."
|
||||
)
|
||||
return None
|
||||
|
||||
if HAS_ORJSON:
|
||||
with open(tag_json, 'rb') as f:
|
||||
tag_data = orjson.loads(f.read())
|
||||
else:
|
||||
with open(tag_json, 'r', encoding='utf-8') as f:
|
||||
tag_data = json.load(f)
|
||||
|
||||
tag_data['processor'] = 'restock_diff'
|
||||
# Rehydrate tag (convert dict to Tag object)
|
||||
# processor_override is set inside the rehydration function
|
||||
tag_obj = rehydrate_entity_func(uuid, tag_data)
|
||||
return tag_obj
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.critical(
|
||||
f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
|
||||
f"File: {tag_json}. Error: {e}. "
|
||||
f"Tag will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
except ValueError as e:
|
||||
# orjson raises ValueError for invalid JSON
|
||||
if "invalid json" in str(e).lower() or HAS_ORJSON:
|
||||
logger.critical(
|
||||
f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
|
||||
f"File: {tag_json}. Error: {e}. "
|
||||
f"Tag will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
# Re-raise if it's not a JSON parsing error
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
logger.debug(f"Tag file not found: {tag_json} for tag {uuid}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load tag {uuid} from {tag_json}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def load_all_tags(datastore_path, rehydrate_entity_func):
|
||||
"""
|
||||
Load all tags from individual tag.json files.
|
||||
|
||||
Tags are stored separately from settings in {uuid}/tag.json files.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
rehydrate_entity_func: Function to convert dict to Tag object
|
||||
|
||||
Returns:
|
||||
Dictionary of uuid -> Tag object
|
||||
"""
|
||||
logger.info("Loading tags from individual tag.json files...")
|
||||
|
||||
tags = {}
|
||||
|
||||
if not os.path.exists(datastore_path):
|
||||
return tags
|
||||
|
||||
# Find all tag.json files using glob
|
||||
tag_files = glob.glob(os.path.join(datastore_path, "*", "tag.json"))
|
||||
|
||||
total = len(tag_files)
|
||||
if total == 0:
|
||||
logger.debug("No tag.json files found")
|
||||
return tags
|
||||
|
||||
logger.debug(f"Found {total} tag.json files")
|
||||
|
||||
loaded = 0
|
||||
failed = 0
|
||||
|
||||
for tag_json in tag_files:
|
||||
# Extract UUID from path: /datastore/{uuid}/tag.json
|
||||
uuid_dir = os.path.basename(os.path.dirname(tag_json))
|
||||
tag = load_tag_from_file(tag_json, uuid_dir, rehydrate_entity_func)
|
||||
if tag:
|
||||
tags[uuid_dir] = tag
|
||||
loaded += 1
|
||||
else:
|
||||
# load_tag_from_file already logged the specific error
|
||||
failed += 1
|
||||
|
||||
if failed > 0:
|
||||
logger.warning(f"Loaded {loaded} tags, {failed} tags FAILED to load")
|
||||
else:
|
||||
logger.info(f"Loaded {loaded} tags from disk")
|
||||
|
||||
return tags
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# FileSavingDataStore Class
|
||||
# ============================================================================
|
||||
|
||||
class FileSavingDataStore(DataStore):
|
||||
"""
|
||||
Abstract datastore that provides file persistence with immediate commits.
|
||||
|
||||
Features:
|
||||
- Individual watch.json files (one per watch)
|
||||
- Immediate persistence via watch.commit() and datastore.commit()
|
||||
- Atomic file writes for crash safety
|
||||
|
||||
Subclasses must implement:
|
||||
- rehydrate_entity(): Convert dict to Watch object
|
||||
- Access to internal __data structure for watch management
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def _save_settings(self):
|
||||
"""
|
||||
Save settings to storage (polymorphic).
|
||||
|
||||
Subclasses must implement for their backend.
|
||||
- File: changedetection.json
|
||||
- Redis: SET settings
|
||||
- SQL: UPDATE settings table
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _save_settings")
|
||||
|
||||
|
||||
def _load_watches(self):
|
||||
"""
|
||||
Load all watches from storage (polymorphic).
|
||||
|
||||
Subclasses must implement for their backend.
|
||||
- File: Read individual watch.json files
|
||||
- Redis: SCAN watch:* keys
|
||||
- SQL: SELECT * FROM watches
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _load_watches")
|
||||
|
||||
def _delete_watch(self, uuid):
|
||||
"""
|
||||
Delete a watch from storage (polymorphic).
|
||||
|
||||
Subclasses must implement for their backend.
|
||||
- File: Delete {uuid}/ directory recursively
|
||||
- Redis: DEL watch:{uuid}
|
||||
- SQL: DELETE FROM watches WHERE uuid=?
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID to delete
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _delete_watch")
|
||||
|
||||
|
||||
@@ -0,0 +1,725 @@
|
||||
"""
|
||||
Schema update migrations for the datastore.
|
||||
|
||||
This module contains all schema version upgrade methods (update_1 through update_N).
|
||||
These are mixed into ChangeDetectionStore to keep the main store file focused.
|
||||
|
||||
IMPORTANT: Each update could be run even when they have a new install and the schema is correct.
|
||||
Therefore - each `update_n` should be very careful about checking if it needs to actually run.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tarfile
|
||||
import time
|
||||
from loguru import logger
|
||||
from copy import deepcopy
|
||||
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
HAS_ORJSON = True
|
||||
except ImportError:
|
||||
HAS_ORJSON = False
|
||||
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
from ..processors.restock_diff import Restock
|
||||
from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
|
||||
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
|
||||
def create_backup_tarball(datastore_path, update_number):
|
||||
"""
|
||||
Create a tarball backup of the entire datastore structure before running an update.
|
||||
|
||||
Includes:
|
||||
- All {uuid}/watch.json files
|
||||
- All {uuid}/tag.json files
|
||||
- changedetection.json (settings, if it exists)
|
||||
- url-watches.json (legacy format, if it exists)
|
||||
- Directory structure preserved
|
||||
|
||||
Args:
|
||||
datastore_path: Path to datastore directory
|
||||
update_number: Update number being applied
|
||||
|
||||
Returns:
|
||||
str: Path to created tarball, or None if backup failed
|
||||
|
||||
Restoration:
|
||||
To restore from a backup:
|
||||
cd /path/to/datastore
|
||||
tar -xzf before-update-N-timestamp.tar.gz
|
||||
This will restore all watch.json and tag.json files and settings to their pre-update state.
|
||||
"""
|
||||
timestamp = int(time.time())
|
||||
backup_filename = f"before-update-{update_number}-{timestamp}.tar.gz"
|
||||
backup_path = os.path.join(datastore_path, backup_filename)
|
||||
|
||||
try:
|
||||
logger.info(f"Creating backup tarball: {backup_filename}")
|
||||
|
||||
with tarfile.open(backup_path, "w:gz") as tar:
|
||||
# Backup changedetection.json if it exists (new format)
|
||||
changedetection_json = os.path.join(datastore_path, "changedetection.json")
|
||||
if os.path.isfile(changedetection_json):
|
||||
tar.add(changedetection_json, arcname="changedetection.json")
|
||||
logger.debug("Added changedetection.json to backup")
|
||||
|
||||
# Backup url-watches.json if it exists (legacy format)
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
if os.path.isfile(url_watches_json):
|
||||
tar.add(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Backup all watch/tag directories with their JSON files
|
||||
# This preserves the UUID directory structure
|
||||
watch_count = 0
|
||||
tag_count = 0
|
||||
for entry in os.listdir(datastore_path):
|
||||
entry_path = os.path.join(datastore_path, entry)
|
||||
|
||||
# Skip if not a directory
|
||||
if not os.path.isdir(entry_path):
|
||||
continue
|
||||
|
||||
# Skip hidden directories and backup directories
|
||||
if entry.startswith('.') or entry.startswith('before-update-'):
|
||||
continue
|
||||
|
||||
# Backup watch.json if exists
|
||||
watch_json = os.path.join(entry_path, "watch.json")
|
||||
if os.path.isfile(watch_json):
|
||||
tar.add(watch_json, arcname=f"{entry}/watch.json")
|
||||
watch_count += 1
|
||||
|
||||
if watch_count % 100 == 0:
|
||||
logger.debug(f"Backed up {watch_count} watch.json files...")
|
||||
|
||||
# Backup tag.json if exists
|
||||
tag_json = os.path.join(entry_path, "tag.json")
|
||||
if os.path.isfile(tag_json):
|
||||
tar.add(tag_json, arcname=f"{entry}/tag.json")
|
||||
tag_count += 1
|
||||
|
||||
logger.success(f"Backup created: {backup_filename} ({watch_count} watches from disk, {tag_count} tags from disk)")
|
||||
return backup_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create backup tarball: {e}")
|
||||
# Try to clean up partial backup
|
||||
if os.path.exists(backup_path):
|
||||
try:
|
||||
os.unlink(backup_path)
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
class DatastoreUpdatesMixin:
|
||||
"""
|
||||
Mixin class containing all schema update methods.
|
||||
|
||||
This class is inherited by ChangeDetectionStore to provide schema migration functionality.
|
||||
Each update_N method upgrades the schema from version N-1 to version N.
|
||||
"""
|
||||
|
||||
def get_updates_available(self):
|
||||
"""
|
||||
Discover all available update methods.
|
||||
|
||||
Returns:
|
||||
list: Sorted list of update version numbers (e.g., [1, 2, 3, ..., 26])
|
||||
"""
|
||||
import inspect
|
||||
updates_available = []
|
||||
for i, o in inspect.getmembers(self, predicate=inspect.ismethod):
|
||||
m = re.search(r'update_(\d+)$', i)
|
||||
if m:
|
||||
updates_available.append(int(m.group(1)))
|
||||
updates_available.sort()
|
||||
|
||||
return updates_available
|
||||
|
||||
def run_updates(self, current_schema_version=None):
|
||||
import sys
|
||||
"""
|
||||
Run all pending schema updates sequentially.
|
||||
|
||||
Args:
|
||||
current_schema_version: Optional current schema version. If provided, only run updates
|
||||
greater than this version. If None, uses the schema version from
|
||||
the datastore. If no schema version exists in datastore and it appears
|
||||
to be a fresh install, sets to latest update number (no updates needed).
|
||||
|
||||
IMPORTANT: Each update could be run even when they have a new install and the schema is correct.
|
||||
Therefore - each `update_n` should be very careful about checking if it needs to actually run.
|
||||
|
||||
Process:
|
||||
1. Get list of available updates
|
||||
2. For each update > current schema version:
|
||||
- Create backup of datastore
|
||||
- Run update method
|
||||
- Update schema version and commit settings
|
||||
- Commit all watches and tags
|
||||
3. If any update fails, stop processing
|
||||
4. All changes saved via individual .commit() calls
|
||||
"""
|
||||
updates_available = self.get_updates_available()
|
||||
if self.data.get('watching'):
|
||||
test_watch = self.data['watching'].get(next(iter(self.data.get('watching', {}))))
|
||||
from ..model.Watch import model
|
||||
|
||||
if not isinstance(test_watch, model):
|
||||
import sys
|
||||
logger.critical("Cannot run updates! Watch structure must be re-hydrated back to a Watch model object!")
|
||||
sys.exit(1)
|
||||
|
||||
if self.data['settings']['application'].get('tags',{}):
|
||||
test_tag = self.data['settings']['application'].get('tags',{}).get(next(iter(self.data['settings']['application'].get('tags',{}))))
|
||||
from ..model.Tag import model as tag_model
|
||||
|
||||
if not isinstance(test_tag, tag_model):
|
||||
import sys
|
||||
logger.critical("Cannot run updates! Watch tag/group structure must be re-hydrated back to a Tag model object!")
|
||||
sys.exit(1)
|
||||
|
||||
# Determine current schema version
|
||||
if current_schema_version is None:
|
||||
# Check if schema_version exists in datastore
|
||||
current_schema_version = self.data['settings']['application'].get('schema_version')
|
||||
|
||||
if current_schema_version is None:
|
||||
# No schema version found - could be a fresh install or very old datastore
|
||||
# If this is a fresh/new config with no watches, assume it's up-to-date
|
||||
# and set to latest update number (no updates needed)
|
||||
if len(self.data['watching']) == 0:
|
||||
# Get the highest update number from available update methods
|
||||
latest_update = updates_available[-1] if updates_available else 0
|
||||
logger.info(f"No schema version found and no watches exist - assuming fresh install, setting schema_version to {latest_update}")
|
||||
self.data['settings']['application']['schema_version'] = latest_update
|
||||
self.commit()
|
||||
return # No updates needed for fresh install
|
||||
else:
|
||||
# Has watches but no schema version - likely old datastore, run all updates
|
||||
logger.warning("No schema version found but watches exist - running all updates from version 0")
|
||||
current_schema_version = 0
|
||||
|
||||
logger.info(f"Current schema version: {current_schema_version}")
|
||||
|
||||
updates_ran = []
|
||||
|
||||
for update_n in updates_available:
|
||||
if update_n > current_schema_version:
|
||||
logger.critical(f"Applying update_{update_n}")
|
||||
|
||||
# Create tarball backup of entire datastore structure
|
||||
# This includes all watch.json files, settings, and preserves directory structure
|
||||
backup_path = create_backup_tarball(self.datastore_path, update_n)
|
||||
if backup_path:
|
||||
logger.info(f"Backup created at: {backup_path}")
|
||||
else:
|
||||
logger.warning("Backup creation failed, but continuing with update")
|
||||
|
||||
try:
|
||||
update_method = getattr(self, f"update_{update_n}")()
|
||||
except Exception as e:
|
||||
logger.critical(f"Error while trying update_{update_n}")
|
||||
logger.exception(e)
|
||||
sys.exit(1)
|
||||
else:
|
||||
# Bump the version
|
||||
self.data['settings']['application']['schema_version'] = update_n
|
||||
self.commit()
|
||||
|
||||
logger.success(f"Update {update_n} completed")
|
||||
|
||||
# Track which updates ran
|
||||
updates_ran.append(update_n)
|
||||
|
||||
# ============================================================================
|
||||
# Individual Update Methods
|
||||
# ============================================================================
|
||||
|
||||
def update_1(self):
|
||||
"""Convert minutes to seconds on settings and each watch."""
|
||||
if self.data['settings']['requests'].get('minutes_between_check'):
|
||||
self.data['settings']['requests']['time_between_check']['minutes'] = self.data['settings']['requests']['minutes_between_check']
|
||||
# Remove the default 'hours' that is set from the model
|
||||
self.data['settings']['requests']['time_between_check']['hours'] = None
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if 'minutes_between_check' in watch:
|
||||
# Only upgrade individual watch time if it was set
|
||||
if watch.get('minutes_between_check', False):
|
||||
self.data['watching'][uuid]['time_between_check']['minutes'] = watch['minutes_between_check']
|
||||
|
||||
def update_2(self):
|
||||
"""
|
||||
Move the history list to a flat text file index.
|
||||
Better than SQLite because this list is only appended to, and works across NAS / NFS type setups.
|
||||
"""
|
||||
# @todo test running this on a newly updated one (when this already ran)
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
history = []
|
||||
|
||||
if watch.get('history', False):
|
||||
for d, p in watch['history'].items():
|
||||
d = int(d) # Used to be keyed as str, we'll fix this now too
|
||||
history.append("{},{}\n".format(d, p))
|
||||
|
||||
if len(history):
|
||||
target_path = os.path.join(self.datastore_path, uuid)
|
||||
if os.path.exists(target_path):
|
||||
with open(os.path.join(target_path, "history.txt"), "w") as f:
|
||||
f.writelines(history)
|
||||
else:
|
||||
logger.warning(f"Datastore history directory {target_path} does not exist, skipping history import.")
|
||||
|
||||
# No longer needed, dynamically pulled from the disk when needed.
|
||||
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
|
||||
# In the distant future we can remove this entirely
|
||||
self.data['watching'][uuid]['history'] = {}
|
||||
|
||||
def update_3(self):
|
||||
"""We incorrectly stored last_changed when there was not a change, and then confused the output list table."""
|
||||
# see https://github.com/dgtlmoon/changedetection.io/pull/835
|
||||
return
|
||||
|
||||
def update_4(self):
|
||||
"""`last_changed` not needed, we pull that information from the history.txt index."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
# Remove it from the struct
|
||||
del(watch['last_changed'])
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_5(self):
|
||||
"""
|
||||
If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings.
|
||||
In other words - the watch notification_title and notification_body are not needed if they are the same as the default one.
|
||||
"""
|
||||
current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
watch_body = watch.get('notification_body', '')
|
||||
if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body:
|
||||
# Looks the same as the default one, so unset it
|
||||
watch['notification_body'] = None
|
||||
|
||||
watch_title = watch.get('notification_title', '')
|
||||
if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title:
|
||||
# Looks the same as the default one, so unset it
|
||||
watch['notification_title'] = None
|
||||
except Exception as e:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_7(self):
|
||||
"""
|
||||
We incorrectly used common header overrides that should only apply to Requests.
|
||||
These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium.
|
||||
"""
|
||||
# These were hard-coded in early versions
|
||||
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
|
||||
if self.data['settings']['headers'].get(v):
|
||||
del self.data['settings']['headers'][v]
|
||||
|
||||
def update_8(self):
|
||||
"""Convert filters to a list of filters css_filter -> include_filters."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
existing_filter = watch.get('css_filter', '')
|
||||
if existing_filter:
|
||||
watch['include_filters'] = [existing_filter]
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_9(self):
|
||||
"""Convert old static notification tokens to jinja2 tokens."""
|
||||
# Each watch
|
||||
# only { } not {{ or }}
|
||||
r = r'(?<!{){(?!{)(\w+)(?<!})}(?!})'
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
n_body = watch.get('notification_body', '')
|
||||
if n_body:
|
||||
watch['notification_body'] = re.sub(r, r'{{\1}}', n_body)
|
||||
|
||||
n_title = watch.get('notification_title')
|
||||
if n_title:
|
||||
watch['notification_title'] = re.sub(r, r'{{\1}}', n_title)
|
||||
|
||||
n_urls = watch.get('notification_urls')
|
||||
if n_urls:
|
||||
for i, url in enumerate(n_urls):
|
||||
watch['notification_urls'][i] = re.sub(r, r'{{\1}}', url)
|
||||
|
||||
except:
|
||||
continue
|
||||
|
||||
# System wide
|
||||
n_body = self.data['settings']['application'].get('notification_body')
|
||||
if n_body:
|
||||
self.data['settings']['application']['notification_body'] = re.sub(r, r'{{\1}}', n_body)
|
||||
|
||||
n_title = self.data['settings']['application'].get('notification_title')
|
||||
if n_body:
|
||||
self.data['settings']['application']['notification_title'] = re.sub(r, r'{{\1}}', n_title)
|
||||
|
||||
n_urls = self.data['settings']['application'].get('notification_urls')
|
||||
if n_urls:
|
||||
for i, url in enumerate(n_urls):
|
||||
self.data['settings']['application']['notification_urls'][i] = re.sub(r, r'{{\1}}', url)
|
||||
|
||||
return
|
||||
|
||||
def update_10(self):
|
||||
"""Some setups may have missed the correct default, so it shows the wrong config in the UI, although it will default to system-wide."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
if not watch.get('fetch_backend', ''):
|
||||
watch['fetch_backend'] = 'system'
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_12(self):
|
||||
"""Create tag objects and their references from existing tag text."""
|
||||
i = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
# Split out and convert old tag string
|
||||
tag = watch.get('tag')
|
||||
if tag:
|
||||
tag_uuids = []
|
||||
for t in tag.split(','):
|
||||
tag_uuids.append(self.add_tag(title=t))
|
||||
|
||||
self.data['watching'][uuid]['tags'] = tag_uuids
|
||||
|
||||
def update_13(self):
|
||||
"""#1775 - Update 11 did not update the records correctly when adding 'date_created' values for sorting."""
|
||||
i = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if not watch.get('date_created'):
|
||||
self.data['watching'][uuid]['date_created'] = i
|
||||
i += 1
|
||||
return
|
||||
|
||||
def update_14(self):
|
||||
"""#1774 - protect xpath1 against migration."""
|
||||
for awatch in self.data["watching"]:
|
||||
if self.data["watching"][awatch]['include_filters']:
|
||||
for num, selector in enumerate(self.data["watching"][awatch]['include_filters']):
|
||||
if selector.startswith('/'):
|
||||
self.data["watching"][awatch]['include_filters'][num] = 'xpath1:' + selector
|
||||
if selector.startswith('xpath:'):
|
||||
self.data["watching"][awatch]['include_filters'][num] = selector.replace('xpath:', 'xpath1:', 1)
|
||||
|
||||
def update_15(self):
|
||||
"""Use more obvious default time setting."""
|
||||
for uuid in self.data["watching"]:
|
||||
if self.data["watching"][uuid]['time_between_check'] == self.data['settings']['requests']['time_between_check']:
|
||||
# What the old logic was, which was pretty confusing
|
||||
self.data["watching"][uuid]['time_between_check_use_default'] = True
|
||||
elif all(value is None or value == 0 for value in self.data["watching"][uuid]['time_between_check'].values()):
|
||||
self.data["watching"][uuid]['time_between_check_use_default'] = True
|
||||
else:
|
||||
# Something custom here
|
||||
self.data["watching"][uuid]['time_between_check_use_default'] = False
|
||||
|
||||
def update_16(self):
|
||||
"""Correctly set datatype for older installs where 'tag' was string and update_12 did not catch it."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if isinstance(watch.get('tags'), str):
|
||||
self.data['watching'][uuid]['tags'] = []
|
||||
|
||||
def update_17(self):
|
||||
"""Migrate old 'in_stock' values to the new Restock."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if 'in_stock' in watch:
|
||||
watch['restock'] = Restock({'in_stock': watch.get('in_stock')})
|
||||
del watch['in_stock']
|
||||
|
||||
def update_18(self):
|
||||
"""Migrate old restock settings."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if not watch.get('restock_settings'):
|
||||
# So we enable price following by default
|
||||
self.data['watching'][uuid]['restock_settings'] = {'follow_price_changes': True}
|
||||
|
||||
# Migrate and cleanoff old value
|
||||
self.data['watching'][uuid]['restock_settings']['in_stock_processing'] = 'in_stock_only' if watch.get(
|
||||
'in_stock_only') else 'all_changes'
|
||||
|
||||
if self.data['watching'][uuid].get('in_stock_only'):
|
||||
del (self.data['watching'][uuid]['in_stock_only'])
|
||||
|
||||
def update_19(self):
|
||||
"""Compress old elements.json to elements.deflate, saving disk, this compression is pretty fast."""
|
||||
import zlib
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
json_path = os.path.join(self.datastore_path, uuid, "elements.json")
|
||||
deflate_path = os.path.join(self.datastore_path, uuid, "elements.deflate")
|
||||
|
||||
if os.path.exists(json_path):
|
||||
with open(json_path, "rb") as f_j:
|
||||
with open(deflate_path, "wb") as f_d:
|
||||
logger.debug(f"Compressing {str(json_path)} to {str(deflate_path)}..")
|
||||
f_d.write(zlib.compress(f_j.read()))
|
||||
os.unlink(json_path)
|
||||
|
||||
def update_20(self):
|
||||
"""Migrate extract_title_as_title to use_page_title_in_list."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if self.data['watching'][uuid].get('extract_title_as_title'):
|
||||
self.data['watching'][uuid]['use_page_title_in_list'] = self.data['watching'][uuid].get('extract_title_as_title')
|
||||
del self.data['watching'][uuid]['extract_title_as_title']
|
||||
|
||||
if self.data['settings']['application'].get('extract_title_as_title'):
|
||||
# Ensure 'ui' key exists (defensive for edge cases where base_config merge didn't happen)
|
||||
if 'ui' not in self.data['settings']['application']:
|
||||
self.data['settings']['application']['ui'] = {
|
||||
'use_page_title_in_list': True,
|
||||
'open_diff_in_new_tab': True,
|
||||
'socket_io_enabled': True,
|
||||
'favicons_enabled': True
|
||||
}
|
||||
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
|
||||
|
||||
def update_21(self):
|
||||
"""Migrate timezone to scheduler_timezone_default."""
|
||||
if self.data['settings']['application'].get('timezone'):
|
||||
self.data['settings']['application']['scheduler_timezone_default'] = self.data['settings']['application'].get('timezone')
|
||||
del self.data['settings']['application']['timezone']
|
||||
|
||||
def update_23(self):
|
||||
"""Some notification formats got the wrong name type."""
|
||||
|
||||
def re_run(formats):
|
||||
sys_n_format = self.data['settings']['application'].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == sys_n_format), None)
|
||||
if key_exists_as_value: # key of "Plain text"
|
||||
logger.success(f"['settings']['application']['notification_format'] '{sys_n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['settings']['application']['notification_format'] = key_exists_as_value
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
n_format = self.data['watching'][uuid].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
|
||||
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
|
||||
logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['watching'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever
|
||||
|
||||
for uuid, tag in self.data['settings']['application']['tags'].items():
|
||||
n_format = self.data['settings']['application']['tags'][uuid].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
|
||||
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
|
||||
logger.success(
|
||||
f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['settings']['application']['tags'][uuid][
|
||||
'notification_format'] = key_exists_as_value # should be 'text' or whatever
|
||||
|
||||
from ..notification import valid_notification_formats
|
||||
formats = deepcopy(valid_notification_formats)
|
||||
re_run(formats)
|
||||
# And in previous versions, it was "text" instead of Plain text, Markdown instead of "Markdown to HTML"
|
||||
formats['text'] = 'Text'
|
||||
formats['markdown'] = 'Markdown'
|
||||
re_run(formats)
|
||||
|
||||
def update_24(self):
|
||||
"""RSS types should be inline with the same names as notification types."""
|
||||
rss_format = self.data['settings']['application'].get('rss_content_format')
|
||||
if not rss_format or 'text' in rss_format:
|
||||
# might have been 'plaintext, 'plain text' or something
|
||||
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
|
||||
elif 'html' in rss_format:
|
||||
self.data['settings']['application']['rss_content_format'] = 'htmlcolor'
|
||||
else:
|
||||
# safe fallback to text
|
||||
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
|
||||
|
||||
def update_25(self):
|
||||
"""Different processors now hold their own history.txt."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
processor = self.data['watching'][uuid].get('processor')
|
||||
if processor != 'text_json_diff':
|
||||
old_history_txt = os.path.join(self.datastore_path, "history.txt")
|
||||
target_history_name = f"history-{processor}.txt"
|
||||
if os.path.isfile(old_history_txt) and not os.path.isfile(target_history_name):
|
||||
new_history_txt = os.path.join(self.datastore_path, target_history_name)
|
||||
logger.debug(f"Renaming history index {old_history_txt} to {new_history_txt}...")
|
||||
shutil.move(old_history_txt, new_history_txt)
|
||||
|
||||
def migrate_legacy_db_format(self):
|
||||
"""
|
||||
Migration: Individual watch persistence (COPY-based, safe rollback).
|
||||
|
||||
Loads legacy url-watches.json format and migrates to:
|
||||
- {uuid}/watch.json (per watch)
|
||||
- changedetection.json (settings only)
|
||||
|
||||
IMPORTANT:
|
||||
- A tarball backup (before-update-26-timestamp.tar.gz) is created before migration
|
||||
- url-watches.json is LEFT INTACT for rollback safety
|
||||
- Users can roll back by simply downgrading to the previous version
|
||||
- Or restore from tarball: tar -xzf before-update-26-*.tar.gz
|
||||
|
||||
This is a dedicated migration release - users upgrade at their own pace.
|
||||
"""
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("Running migration: Individual watch persistence (update_26)")
|
||||
logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
# Populate settings from legacy data
|
||||
logger.info("Populating settings from legacy data...")
|
||||
watch_count = len(self.data['watching'])
|
||||
logger.success(f"Loaded {watch_count} watches from legacy format")
|
||||
|
||||
# Phase 1: Save all watches to individual files
|
||||
logger.critical(f"Phase 1/4: Saving {watch_count} watches to individual watch.json files...")
|
||||
|
||||
saved_count = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
watch.commit()
|
||||
saved_count += 1
|
||||
|
||||
if saved_count % 100 == 0:
|
||||
logger.info(f" Progress: {saved_count}/{watch_count} watches migrated...")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save watch {uuid}: {e}")
|
||||
raise Exception(
|
||||
f"Migration failed: Could not save watch {uuid}. "
|
||||
f"url-watches.json remains intact, safe to retry. Error: {e}"
|
||||
)
|
||||
|
||||
logger.critical(f"Phase 1 complete: Saved {saved_count} watches")
|
||||
|
||||
# Phase 2: Verify all files exist
|
||||
logger.critical("Phase 2/4: Verifying all watch.json files were created...")
|
||||
|
||||
missing = []
|
||||
for uuid in self.data['watching'].keys():
|
||||
watch_json = os.path.join(self.datastore_path, uuid, "watch.json")
|
||||
if not os.path.isfile(watch_json):
|
||||
missing.append(uuid)
|
||||
|
||||
if missing:
|
||||
raise Exception(
|
||||
f"Migration failed: {len(missing)} watch files missing: {missing[:5]}... "
|
||||
f"url-watches.json remains intact, safe to retry."
|
||||
)
|
||||
|
||||
logger.critical(f"Phase 2 complete: Verified {watch_count} watch files")
|
||||
|
||||
# Phase 3: Create new settings file
|
||||
logger.critical("Phase 3/4: Creating changedetection.json...")
|
||||
|
||||
try:
|
||||
self._save_settings()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create changedetection.json: {e}")
|
||||
raise Exception(
|
||||
f"Migration failed: Could not create changedetection.json. "
|
||||
f"url-watches.json remains intact, safe to retry. Error: {e}"
|
||||
)
|
||||
|
||||
# Phase 4: Verify settings file exists
|
||||
logger.critical("Phase 4/4: Verifying changedetection.json exists...")
|
||||
changedetection_json_new_schema=os.path.join(self.datastore_path, "changedetection.json")
|
||||
if not os.path.isfile(changedetection_json_new_schema):
|
||||
import sys
|
||||
logger.critical("Migration failed, changedetection.json not found after update ran!")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
logger.critical("Phase 4 complete: Verified changedetection.json exists")
|
||||
|
||||
# Success! Now reload from new format
|
||||
logger.critical("Reloading datastore from new format...")
|
||||
# write it to disk, it will be saved without ['watching'] in the JSON db because we find it from disk glob
|
||||
self._save_settings()
|
||||
logger.success("Datastore reloaded from new format successfully")
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
|
||||
logger.critical("=" * 80)
|
||||
logger.info("")
|
||||
logger.info("New format:")
|
||||
logger.info(f" - {watch_count} individual watch.json files created")
|
||||
logger.info(f" - changedetection.json created (settings only)")
|
||||
logger.info("")
|
||||
logger.info("Rollback safety:")
|
||||
logger.info(" - url-watches.json preserved for rollback")
|
||||
logger.info(" - To rollback: downgrade to previous version and restart")
|
||||
logger.info(" - No manual file operations needed")
|
||||
logger.info("")
|
||||
logger.info("Optional cleanup (after testing new version):")
|
||||
logger.info(f" - rm {os.path.join(self.datastore_path, 'url-watches.json')}")
|
||||
logger.info("")
|
||||
|
||||
def update_26(self):
|
||||
self.migrate_legacy_db_format()
|
||||
|
||||
def update_28(self):
|
||||
"""
|
||||
Migrate tags to individual tag.json files.
|
||||
|
||||
Tags are currently saved only in changedetection.json (settings).
|
||||
This migration ALSO saves them to individual {uuid}/tag.json files,
|
||||
similar to how watches are stored (dual storage).
|
||||
|
||||
Benefits:
|
||||
- Allows atomic tag updates without rewriting entire settings
|
||||
- Enables independent tag versioning/backup
|
||||
- Maintains backwards compatibility (tags stay in settings too)
|
||||
"""
|
||||
# Force save as tag.json (not watch.json) even if object is corrupted
|
||||
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("Running migration: Individual tag persistence (update_28)")
|
||||
logger.critical("Creating individual tag.json files")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
tags = self.data['settings']['application'].get('tags', {})
|
||||
tag_count = len(tags)
|
||||
|
||||
if tag_count == 0:
|
||||
logger.info("No tags found, skipping migration")
|
||||
return
|
||||
|
||||
logger.info(f"Migrating {tag_count} tags to individual tag.json files...")
|
||||
|
||||
saved_count = 0
|
||||
failed_count = 0
|
||||
|
||||
for uuid, tag_data in tags.items():
|
||||
try:
|
||||
tag_data.commit()
|
||||
saved_count += 1
|
||||
if saved_count % 10 == 0:
|
||||
logger.info(f" Progress: {saved_count}/{tag_count} tags migrated...")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save tag {uuid} ({tag_data.get('title', 'unknown')}): {e}")
|
||||
failed_count += 1
|
||||
|
||||
if failed_count > 0:
|
||||
logger.warning(f"Migration complete: {saved_count} tags saved, {failed_count} tags FAILED")
|
||||
else:
|
||||
logger.success(f"Migration complete: {saved_count} tags saved to individual tag.json files")
|
||||
|
||||
# Tags remain in settings for backwards compatibility AND easy access
|
||||
# On next load, _load_tags() will read from tag.json files and merge with settings
|
||||
logger.info("Tags saved to both settings AND individual tag.json files")
|
||||
logger.info("Future tag edits will update both locations (dual storage)")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="{{url_for('static_content', group='favicons', filename='apple-touch-icon.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="{{url_for('static_content', group='favicons', filename='favicon-32x32.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="{{url_for('static_content', group='favicons', filename='favicon-16x16.png')}}">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}" crossorigin="use-credentials">
|
||||
<link rel="mask-icon" href="{{url_for('static_content', group='favicons', filename='safari-pinned-tab.svg')}}" color="#5bbad5">
|
||||
<link rel="shortcut icon" href="{{url_for('static_content', group='favicons', filename='favicon.ico')}}">
|
||||
<meta name="msapplication-TileColor" content="#da532c">
|
||||
@@ -265,6 +265,9 @@
|
||||
</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div>
|
||||
<a href="{{ url_for('ui.delete_locale_language_session_var_if_it_exists', redirect=request.path) }}" >{{ _('Auto-detect from browser') }}</a>
|
||||
</div>
|
||||
<div>
|
||||
{{ _('Language support is in beta, please help us improve by opening a PR on GitHub with any updates.') }}
|
||||
</div>
|
||||
|
||||
Executable
+265
@@ -0,0 +1,265 @@
|
||||
#!/bin/bash
|
||||
# Test script for CLI options - Parallel execution
|
||||
# Tests -u, -uN, -r, -b flags
|
||||
|
||||
set -u # Exit on undefined variables
|
||||
|
||||
# Color output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Test results directory (for parallel safety)
|
||||
TEST_RESULTS_DIR="/tmp/cli-test-results-$$"
|
||||
mkdir -p "$TEST_RESULTS_DIR"
|
||||
|
||||
# Cleanup function
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "=== Cleaning up test directories ==="
|
||||
rm -rf /tmp/cli-test-* 2>/dev/null || true
|
||||
rm -rf "$TEST_RESULTS_DIR" 2>/dev/null || true
|
||||
# Kill any hanging processes
|
||||
pkill -f "changedetection.py.*cli-test" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Helper to record test result
|
||||
record_result() {
|
||||
local test_num=$1
|
||||
local status=$2 # pass or fail
|
||||
local message=$3
|
||||
|
||||
echo "$status|$message" > "$TEST_RESULTS_DIR/test_${test_num}.result"
|
||||
}
|
||||
|
||||
# Run a test in background
|
||||
run_test() {
|
||||
local test_num=$1
|
||||
local test_name=$2
|
||||
local test_func=$3
|
||||
|
||||
(
|
||||
echo -e "${YELLOW}[Test $test_num]${NC} $test_name"
|
||||
if $test_func "$test_num"; then
|
||||
record_result "$test_num" "pass" "$test_name"
|
||||
echo -e "${GREEN}✓ PASS${NC}: $test_name"
|
||||
else
|
||||
record_result "$test_num" "fail" "$test_name"
|
||||
echo -e "${RED}✗ FAIL${NC}: $test_name"
|
||||
fi
|
||||
) &
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Test Functions (each runs independently)
|
||||
# =============================================================================
|
||||
|
||||
test_help_flag() {
|
||||
local test_id=$1
|
||||
timeout 3 python3 changedetection.py --help 2>&1 | grep -q "Add URLs on startup"
|
||||
}
|
||||
|
||||
test_version_flag() {
|
||||
local test_id=$1
|
||||
timeout 3 python3 changedetection.py --version 2>&1 | grep -qE "changedetection.io [0-9]+\.[0-9]+"
|
||||
}
|
||||
|
||||
test_single_url() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-single-${test_id}-$$"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C -u https://example.com -b &>/dev/null
|
||||
# Count watch directories (UUID directories containing watch.json)
|
||||
[ "$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' | wc -l)" -eq 1 ]
|
||||
}
|
||||
|
||||
test_multiple_urls() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-multi-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-u https://httpbin.org \
|
||||
-b &>/dev/null
|
||||
# Count watch directories (UUID directories containing watch.json)
|
||||
[ "$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' | wc -l)" -eq 3 ]
|
||||
}
|
||||
|
||||
test_url_with_options() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-opts-${test_id}-$$"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 '{"title":"Test Site","processor":"text_json_diff"}' \
|
||||
-b &>/dev/null
|
||||
# Check that at least one watch.json contains the title "Test Site"
|
||||
python3 -c "
|
||||
import json, glob, sys
|
||||
watch_files = glob.glob('$dir/*/watch.json')
|
||||
for wf in watch_files:
|
||||
with open(wf) as f:
|
||||
data = json.load(f)
|
||||
if data.get('title') == 'Test Site':
|
||||
sys.exit(0)
|
||||
sys.exit(1)
|
||||
"
|
||||
}
|
||||
|
||||
test_multiple_urls_with_options() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-multi-opts-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 '{"title":"Site One"}' \
|
||||
-u https://github.com \
|
||||
-u1 '{"title":"Site Two"}' \
|
||||
-b &>/dev/null
|
||||
# Check that we have 2 watches and both titles are present
|
||||
python3 -c "
|
||||
import json, glob, sys
|
||||
watch_files = glob.glob('$dir/*/watch.json')
|
||||
if len(watch_files) != 2:
|
||||
sys.exit(1)
|
||||
titles = []
|
||||
for wf in watch_files:
|
||||
with open(wf) as f:
|
||||
data = json.load(f)
|
||||
titles.append(data.get('title'))
|
||||
sys.exit(0 if 'Site One' in titles and 'Site Two' in titles else 1)
|
||||
"
|
||||
}
|
||||
|
||||
test_batch_mode_exit() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-batch-${test_id}-$$"
|
||||
local start=$(date +%s)
|
||||
timeout 15 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-b &>/dev/null
|
||||
local end=$(date +%s)
|
||||
local elapsed=$((end - start))
|
||||
[ $elapsed -lt 14 ]
|
||||
}
|
||||
|
||||
test_recheck_all() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-recheck-all-${test_id}-$$"
|
||||
# Create a watch using CLI, then recheck it
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C -u https://example.com -b &>/dev/null
|
||||
# Now recheck all watches
|
||||
timeout 10 python3 changedetection.py -d "$dir" -r all -b 2>&1 | grep -q "Queuing"
|
||||
}
|
||||
|
||||
test_recheck_specific() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-recheck-uuid-${test_id}-$$"
|
||||
# Create 2 watches using CLI
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-b &>/dev/null
|
||||
# Get the UUIDs that were created
|
||||
local uuids=$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' -exec dirname {} \; | xargs -n1 basename | tr '\n' ',' | sed 's/,$//')
|
||||
# Now recheck specific UUIDs
|
||||
timeout 10 python3 changedetection.py -d "$dir" -r "$uuids" -b 2>&1 | grep -q "Queuing"
|
||||
}
|
||||
|
||||
test_combined_operations() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-combined-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-r all \
|
||||
-b &>/dev/null
|
||||
# Count watch directories (UUID directories containing watch.json)
|
||||
[ "$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' | wc -l)" -eq 2 ]
|
||||
}
|
||||
|
||||
test_invalid_json() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-invalid-${test_id}-$$"
|
||||
timeout 5 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 'invalid json here' \
|
||||
2>&1 | grep -qi "invalid json\|json decode error"
|
||||
}
|
||||
|
||||
test_create_directory() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-create-${test_id}-$$/nested/path"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-b &>/dev/null
|
||||
[ -d "$dir" ]
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main Test Execution
|
||||
# =============================================================================
|
||||
|
||||
echo "=========================================="
|
||||
echo " CLI Options Test Suite (Parallel)"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Launch all tests in parallel
|
||||
run_test 1 "Help flag (--help) shows usage without initialization" test_help_flag
|
||||
run_test 2 "Version flag (--version) displays version" test_version_flag
|
||||
run_test 3 "Add single URL with -u flag" test_single_url
|
||||
run_test 4 "Add multiple URLs with multiple -u flags" test_multiple_urls
|
||||
run_test 5 "Add URL with JSON options using -u0" test_url_with_options
|
||||
run_test 6 "Add multiple URLs with different options (-u0, -u1)" test_multiple_urls_with_options
|
||||
run_test 7 "Batch mode (-b) exits automatically after processing" test_batch_mode_exit
|
||||
run_test 8 "Recheck all watches with -r all" test_recheck_all
|
||||
run_test 9 "Recheck specific watches with -r UUID" test_recheck_specific
|
||||
run_test 10 "Combined: Add URLs and recheck all with -u and -r all" test_combined_operations
|
||||
run_test 11 "Invalid JSON in -u0 option should show error" test_invalid_json
|
||||
run_test 12 "Create datastore directory with -C flag" test_create_directory
|
||||
|
||||
# Wait for all tests to complete
|
||||
echo ""
|
||||
echo "Waiting for all tests to complete..."
|
||||
wait
|
||||
|
||||
# Collect results
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " Test Summary"
|
||||
echo "=========================================="
|
||||
|
||||
TESTS_RUN=0
|
||||
TESTS_PASSED=0
|
||||
TESTS_FAILED=0
|
||||
|
||||
for result_file in "$TEST_RESULTS_DIR"/test_*.result; do
|
||||
if [ -f "$result_file" ]; then
|
||||
TESTS_RUN=$((TESTS_RUN + 1))
|
||||
status=$(cut -d'|' -f1 < "$result_file")
|
||||
if [ "$status" = "pass" ]; then
|
||||
TESTS_PASSED=$((TESTS_PASSED + 1))
|
||||
else
|
||||
TESTS_FAILED=$((TESTS_FAILED + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Tests run: $TESTS_RUN"
|
||||
echo -e "${GREEN}Tests passed: $TESTS_PASSED${NC}"
|
||||
if [ $TESTS_FAILED -gt 0 ]; then
|
||||
echo -e "${RED}Tests failed: $TESTS_FAILED${NC}"
|
||||
else
|
||||
echo -e "${GREEN}Tests failed: $TESTS_FAILED${NC}"
|
||||
fi
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Exit with appropriate code
|
||||
if [ $TESTS_FAILED -gt 0 ]; then
|
||||
echo -e "${RED}Some tests failed!${NC}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "${GREEN}All tests passed!${NC}"
|
||||
exit 0
|
||||
fi
|
||||
@@ -5,13 +5,16 @@ from threading import Thread
|
||||
|
||||
import pytest
|
||||
import arrow
|
||||
from changedetectionio import changedetection_app
|
||||
from changedetectionio import store
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import init_app_secret
|
||||
# CRITICAL: Set short timeout for tests to prevent 45-second hangs
|
||||
# When test server is slow/unresponsive, workers fail fast instead of holding UUIDs for 45s
|
||||
# This prevents exponential priority growth from repeated deferrals (priority × 10 each defer)
|
||||
os.environ['DEFAULT_SETTINGS_REQUESTS_TIMEOUT'] = '5'
|
||||
|
||||
from changedetectionio.flask_app import init_app_secret, changedetection_app
|
||||
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
|
||||
|
||||
# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py
|
||||
@@ -31,6 +34,93 @@ def reportlog(pytestconfig):
|
||||
logger.remove(handler_id)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def per_test_log_file(request):
|
||||
"""Create a separate log file for each test function with pytest output."""
|
||||
import re
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
log_dir = os.path.join(os.path.dirname(__file__), "logs")
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
|
||||
# Generate log filename from test name and worker ID (for parallel runs)
|
||||
test_name = request.node.name
|
||||
|
||||
# Sanitize test name - replace unsafe characters with underscores
|
||||
# Keep only alphanumeric, dash, underscore, and period
|
||||
safe_test_name = re.sub(r'[^\w\-.]', '_', test_name)
|
||||
|
||||
# Limit length to avoid filesystem issues (max 200 chars)
|
||||
if len(safe_test_name) > 200:
|
||||
# Keep first 150 chars + hash of full name + last 30 chars
|
||||
import hashlib
|
||||
name_hash = hashlib.md5(test_name.encode()).hexdigest()[:8]
|
||||
safe_test_name = f"{safe_test_name[:150]}_{name_hash}_{safe_test_name[-30:]}"
|
||||
|
||||
worker_id = os.environ.get('PYTEST_XDIST_WORKER', 'master')
|
||||
log_file = os.path.join(log_dir, f"{safe_test_name}_{worker_id}.log")
|
||||
|
||||
# Add file handler for this test with TRACE level
|
||||
handler_id = logger.add(
|
||||
log_file,
|
||||
format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {process} | {name}:{function}:{line} - {message}",
|
||||
level="TRACE",
|
||||
mode="w", # Overwrite if exists
|
||||
enqueue=True # Thread-safe
|
||||
)
|
||||
|
||||
logger.info(f"=== Starting test: {test_name} (worker: {worker_id}) ===")
|
||||
logger.info(f"Test location: {request.node.nodeid}")
|
||||
|
||||
yield
|
||||
|
||||
# Capture test outcome (PASSED/FAILED/SKIPPED/ERROR)
|
||||
outcome = "UNKNOWN"
|
||||
exc_info = None
|
||||
stdout = None
|
||||
stderr = None
|
||||
|
||||
if hasattr(request.node, 'rep_call'):
|
||||
outcome = request.node.rep_call.outcome.upper()
|
||||
if request.node.rep_call.failed:
|
||||
exc_info = request.node.rep_call.longreprtext
|
||||
# Capture stdout/stderr from call phase
|
||||
if hasattr(request.node.rep_call, 'sections'):
|
||||
for section_name, section_content in request.node.rep_call.sections:
|
||||
if 'stdout' in section_name.lower():
|
||||
stdout = section_content
|
||||
elif 'stderr' in section_name.lower():
|
||||
stderr = section_content
|
||||
elif hasattr(request.node, 'rep_setup'):
|
||||
if request.node.rep_setup.failed:
|
||||
outcome = "SETUP_FAILED"
|
||||
exc_info = request.node.rep_setup.longreprtext
|
||||
|
||||
logger.info(f"=== Test Result: {outcome} ===")
|
||||
|
||||
if exc_info:
|
||||
logger.error(f"=== Test Failure Details ===\n{exc_info}")
|
||||
|
||||
if stdout:
|
||||
logger.info(f"=== Captured stdout ===\n{stdout}")
|
||||
|
||||
if stderr:
|
||||
logger.warning(f"=== Captured stderr ===\n{stderr}")
|
||||
|
||||
logger.info(f"=== Finished test: {test_name} ===")
|
||||
logger.remove(handler_id)
|
||||
|
||||
|
||||
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
|
||||
def pytest_runtest_makereport(item, call):
|
||||
"""Hook to capture test results and attach to the test node."""
|
||||
outcome = yield
|
||||
rep = outcome.get_result()
|
||||
|
||||
# Store report on the test node for access in fixtures
|
||||
setattr(item, f"rep_{rep.when}", rep)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def environment(mocker):
|
||||
"""Mock arrow.now() to return a fixed datetime for testing jinja2 time extension."""
|
||||
@@ -167,9 +257,56 @@ def prepare_test_function(live_server, datastore_path):
|
||||
except:
|
||||
break
|
||||
|
||||
# Prevent background thread from writing during cleanup/reload
|
||||
datastore.needs_write = False
|
||||
datastore.needs_write_urgent = False
|
||||
# Add test helper methods to the app for worker management
|
||||
def set_workers(count):
|
||||
"""Set the number of workers for testing - brutal shutdown, no delays"""
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
|
||||
current_count = worker_pool.get_worker_count()
|
||||
|
||||
# Special case: Setting to 0 means shutdown all workers brutally
|
||||
if count == 0:
|
||||
logger.debug(f"Brutally shutting down all {current_count} workers")
|
||||
worker_pool.shutdown_workers()
|
||||
return {
|
||||
'status': 'success',
|
||||
'message': f'Shutdown all {current_count} workers',
|
||||
'previous_count': current_count,
|
||||
'current_count': 0
|
||||
}
|
||||
|
||||
# Adjust worker count (no delays, no verification)
|
||||
result = worker_pool.adjust_async_worker_count(
|
||||
count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=live_server.app,
|
||||
datastore=datastore
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def check_all_workers_alive(expected_count):
|
||||
"""Check that all expected workers are alive"""
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
result = worker_pool.check_worker_health(
|
||||
expected_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=live_server.app,
|
||||
datastore=datastore
|
||||
)
|
||||
assert result['status'] == 'healthy', f"Workers not healthy: {result['message']}"
|
||||
return result
|
||||
|
||||
# Attach helper methods to app for easy test access
|
||||
live_server.app.set_workers = set_workers
|
||||
live_server.app.check_all_workers_alive = check_all_workers_alive
|
||||
|
||||
|
||||
|
||||
|
||||
# CRITICAL: Clean up any files from previous tests
|
||||
# This ensures a completely clean directory
|
||||
@@ -203,7 +340,6 @@ def prepare_test_function(live_server, datastore_path):
|
||||
break
|
||||
|
||||
datastore.data['watching'] = {}
|
||||
datastore.needs_write = True
|
||||
except Exception as e:
|
||||
logger.warning(f"Error during datastore cleanup: {e}")
|
||||
|
||||
@@ -264,8 +400,8 @@ def app(request, datastore_path):
|
||||
|
||||
# Shutdown workers gracefully before loguru cleanup
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
from changedetectionio import worker_pool
|
||||
worker_pool.shutdown_workers()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -313,4 +449,3 @@ def app(request, datastore_path):
|
||||
yield app
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
|
||||
from changedetectionio.tests.util import wait_for_all_checks
|
||||
|
||||
|
||||
def test_check_plugin_processor(client, live_server, measure_memory_usage, datastore_path):
|
||||
# requires os-int intelligence plugin installed (first basic one we test with)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'OSINT Reconnaissance' in res.data, "Must have the OSINT plugin installed at test time"
|
||||
assert b'<input checked id="processor-0" name="processor" type="radio" value="text_json_diff">' in res.data, "But the first text_json_diff processor should always be selected by default in quick watch form"
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": 'http://127.0.0.1', "tags": '', 'processor': 'osint_recon'},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'Target: http://127.0.0.1' in res.data
|
||||
assert b'DNSKEY Records' in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
# Now change it to something that doesnt exist
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
live_server.app.config['DATASTORE'].data['watching'][uuid]['processor'] = "now_missing"
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b"Exception: Processor module" in res.data and b'now_missing' in res.data, f'Should register that the plugin is missing for {uuid}'
|
||||
@@ -465,7 +465,10 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datasto
|
||||
|
||||
assert res.status_code == 400, "Should get error 400 when we give a field that doesnt exist"
|
||||
# Message will come from `flask_expects_json`
|
||||
assert b'Additional properties are not allowed' in res.data
|
||||
# With patternProperties for processor_config_*, the error message format changed slightly
|
||||
assert (b'Additional properties are not allowed' in res.data or
|
||||
b'does not match any of the regexes' in res.data), \
|
||||
"Should reject unknown fields with schema validation error"
|
||||
|
||||
|
||||
# Try a XSS URL
|
||||
@@ -486,6 +489,7 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Test 1: Basic import with tag
|
||||
res = client.post(
|
||||
url_for("import") + "?tag=import-test",
|
||||
data='https://website1.com\r\nhttps://website2.com',
|
||||
@@ -504,6 +508,209 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
|
||||
res = client.get(url_for('tags.tags_overview_page'))
|
||||
assert b'import-test' in res.data
|
||||
|
||||
# Test 2: Import with watch configuration fields (issue #3845)
|
||||
# Test string field (include_filters), boolean (paused), and processor
|
||||
import urllib.parse
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'config-test',
|
||||
'include_filters': 'div.content',
|
||||
'paused': 'true',
|
||||
'processor': 'text_json_diff',
|
||||
'title': 'Imported with Config'
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website3.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
assert len(res.json) == 1
|
||||
uuid = res.json[0]
|
||||
|
||||
# Verify the configuration was applied
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert watch['include_filters'] == ['div.content'], "include_filters should be set as array"
|
||||
assert watch['paused'] == True, "paused should be True"
|
||||
assert watch['processor'] == 'text_json_diff', "processor should be set"
|
||||
assert watch['title'] == 'Imported with Config', "title should be set"
|
||||
|
||||
# Test 3: Import with array field (notification_urls) - using valid Apprise format
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'notification-test',
|
||||
'notification_urls': 'mailto://test@example.com,mailto://admin@example.com'
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website4.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
uuid = res.json[0]
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert 'mailto://test@example.com' in watch['notification_urls'], "notification_urls should contain first email"
|
||||
assert 'mailto://admin@example.com' in watch['notification_urls'], "notification_urls should contain second email"
|
||||
|
||||
# Test 4: Import with object field (time_between_check)
|
||||
import json
|
||||
time_config = json.dumps({"hours": 2, "minutes": 30})
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'schedule-test',
|
||||
'time_between_check': time_config
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website5.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
uuid = res.json[0]
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert watch['time_between_check']['hours'] == 2, "time_between_check hours should be 2"
|
||||
assert watch['time_between_check']['minutes'] == 30, "time_between_check minutes should be 30"
|
||||
|
||||
# Test 5: Import with invalid processor (should fail)
|
||||
res = client.post(
|
||||
url_for("import") + "?processor=invalid_processor",
|
||||
data='https://website6.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 400, "Should reject invalid processor"
|
||||
assert b"Invalid processor" in res.data, "Error message should mention invalid processor"
|
||||
|
||||
# Test 6: Import with invalid field (should fail)
|
||||
res = client.post(
|
||||
url_for("import") + "?unknown_field=value",
|
||||
data='https://website7.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 400, "Should reject unknown field"
|
||||
assert b"Unknown watch configuration parameter" in res.data, "Error message should mention unknown parameter"
|
||||
|
||||
|
||||
def test_api_import_small_synchronous(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that small imports (< threshold) are processed synchronously"""
|
||||
from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Use local test endpoint to avoid network delays
|
||||
test_url_base = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create URLs: threshold - 1 to stay under limit
|
||||
num_urls = min(5, IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD - 1) # Use small number for faster test
|
||||
urls = '\n'.join([f'{test_url_base}?id=small-{i}' for i in range(num_urls)])
|
||||
|
||||
# Import small batch
|
||||
res = client.post(
|
||||
url_for("import") + "?tag=small-test",
|
||||
data=urls,
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Should return 200 OK with UUID list (synchronous)
|
||||
assert res.status_code == 200, f"Should return 200 for small imports, got {res.status_code}"
|
||||
assert isinstance(res.json, list), "Response should be a list of UUIDs"
|
||||
assert len(res.json) == num_urls, f"Should return {num_urls} UUIDs, got {len(res.json)}"
|
||||
|
||||
# Verify all watches were created immediately
|
||||
for uuid in res.json:
|
||||
assert uuid in live_server.app.config['DATASTORE'].data['watching'], \
|
||||
f"Watch {uuid} should exist immediately after synchronous import"
|
||||
|
||||
print(f"\n✓ Successfully created {num_urls} watches synchronously")
|
||||
|
||||
|
||||
def test_api_import_large_background(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that large imports (>= threshold) are processed in background thread"""
|
||||
from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
|
||||
import time
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Use local test endpoint to avoid network delays
|
||||
test_url_base = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Create URLs: threshold + 10 to trigger background processing
|
||||
num_urls = IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD + 10
|
||||
urls = '\n'.join([f'{test_url_base}?id=bulk-{i}' for i in range(num_urls)])
|
||||
|
||||
# Import large batch
|
||||
res = client.post(
|
||||
url_for("import") + "?tag=bulk-test",
|
||||
data=urls,
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Should return 202 Accepted (background processing)
|
||||
assert res.status_code == 202, f"Should return 202 for large imports, got {res.status_code}"
|
||||
assert b"background" in res.data.lower(), "Response should mention background processing"
|
||||
|
||||
# Extract expected count from response
|
||||
response_json = res.json
|
||||
assert 'count' in response_json, "Response should include count"
|
||||
assert response_json['count'] == num_urls, f"Count should be {num_urls}, got {response_json['count']}"
|
||||
|
||||
# Wait for background thread to complete (with timeout)
|
||||
max_wait = 10 # seconds
|
||||
wait_interval = 0.5
|
||||
elapsed = 0
|
||||
watches_created = 0
|
||||
|
||||
while elapsed < max_wait:
|
||||
time.sleep(wait_interval)
|
||||
elapsed += wait_interval
|
||||
|
||||
# Count how many watches have been created
|
||||
watches_created = len([
|
||||
uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
|
||||
if 'id=bulk-' in watch['url']
|
||||
])
|
||||
|
||||
if watches_created == num_urls:
|
||||
break
|
||||
|
||||
# Verify all watches were created
|
||||
assert watches_created == num_urls, \
|
||||
f"Expected {num_urls} watches to be created, but found {watches_created} after {elapsed}s"
|
||||
|
||||
# Verify watches have correct configuration
|
||||
bulk_watches = [
|
||||
watch for watch in live_server.app.config['DATASTORE'].data['watching'].values()
|
||||
if 'id=bulk-' in watch['url']
|
||||
]
|
||||
|
||||
assert len(bulk_watches) == num_urls, "All bulk watches should exist"
|
||||
|
||||
# Check that they have the correct tag
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
# Get UUIDs of bulk watches by filtering the datastore keys
|
||||
bulk_watch_uuids = [
|
||||
uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
|
||||
if 'id=bulk-' in watch['url']
|
||||
]
|
||||
for watch_uuid in bulk_watch_uuids:
|
||||
tags = datastore.get_all_tags_for_watch(uuid=watch_uuid)
|
||||
tag_names = [t['title'] for t in tags.values()]
|
||||
assert 'bulk-test' in tag_names, f"Watch {watch_uuid} should have 'bulk-test' tag"
|
||||
|
||||
print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
|
||||
|
||||
|
||||
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Test notification_urls validation in Watch and Tag API endpoints.
|
||||
Ensures that invalid AppRise URLs are rejected when setting notification_urls.
|
||||
|
||||
Valid AppRise notification URLs use specific protocols like:
|
||||
- posts://example.com - POST to HTTP endpoint
|
||||
- gets://example.com - GET to HTTP endpoint
|
||||
- mailto://user@example.com - Email
|
||||
- slack://token/channel - Slack
|
||||
- discord://webhook_id/webhook_token - Discord
|
||||
- etc.
|
||||
|
||||
Invalid notification URLs:
|
||||
- https://example.com - Plain HTTPS is NOT a valid AppRise notification protocol
|
||||
- ftp://example.com - FTP is NOT a valid AppRise notification protocol
|
||||
- Plain URLs without proper AppRise protocol prefix
|
||||
"""
|
||||
|
||||
from flask import url_for
|
||||
import json
|
||||
|
||||
|
||||
def test_watch_notification_urls_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that Watch PUT/POST endpoints validate notification_urls."""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Test 1: Create a watch with valid notification URLs
|
||||
valid_urls = ["posts://example.com/notify1", "posts://example.com/notify2"]
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"notification_urls": valid_urls
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 201, "Should accept valid notification URLs on watch creation"
|
||||
watch_uuid = res.json['uuid']
|
||||
|
||||
# Verify the notification URLs were saved
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert set(res.json['notification_urls']) == set(valid_urls), "Valid notification URLs should be saved"
|
||||
|
||||
# Test 2: Try to create a watch with invalid notification URLs (https:// is not valid)
|
||||
invalid_urls = ["https://example.com/webhook"]
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"notification_urls": invalid_urls
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject https:// notification URLs (not a valid AppRise protocol)"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 2b: Also test other invalid protocols
|
||||
invalid_urls_ftp = ["ftp://not-apprise-url"]
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"notification_urls": invalid_urls_ftp
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject ftp:// notification URLs"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 3: Update watch with valid notification URLs
|
||||
new_valid_urls = ["posts://newserver.com"]
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"notification_urls": new_valid_urls}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200, "Should accept valid notification URLs on watch update"
|
||||
|
||||
# Verify the notification URLs were updated
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert res.json['notification_urls'] == new_valid_urls, "Valid notification URLs should be updated"
|
||||
|
||||
# Test 4: Try to update watch with invalid notification URLs (plain https:// not valid)
|
||||
invalid_https_url = ["https://example.com/webhook"]
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"notification_urls": invalid_https_url}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject https:// notification URLs on watch update"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 5: Update watch with non-list notification_urls (caught by OpenAPI schema validation)
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"notification_urls": "not-a-list"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject non-list notification_urls"
|
||||
assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
|
||||
|
||||
# Test 6: Verify original URLs are preserved after failed update
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert res.json['notification_urls'] == new_valid_urls, "URLs should remain unchanged after validation failure"
|
||||
|
||||
|
||||
def test_tag_notification_urls_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that Tag PUT endpoint validates notification_urls."""
|
||||
from changedetectionio.model import Tag
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
# Create a tag
|
||||
tag_uuid = datastore.add_tag(title="Test Tag")
|
||||
assert tag_uuid is not None
|
||||
|
||||
# Test 1: Update tag with valid notification URLs
|
||||
valid_urls = ["posts://example.com/tag-notify"]
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
data=json.dumps({"notification_urls": valid_urls}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200, "Should accept valid notification URLs on tag update"
|
||||
|
||||
# Verify the notification URLs were saved
|
||||
tag = datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
assert tag['notification_urls'] == valid_urls, "Valid notification URLs should be saved to tag"
|
||||
|
||||
# Test 2: Try to update tag with invalid notification URLs (https:// not valid)
|
||||
invalid_urls = ["https://example.com/webhook"]
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
data=json.dumps({"notification_urls": invalid_urls}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject https:// notification URLs on tag update"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 3: Update tag with non-list notification_urls (caught by OpenAPI schema validation)
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
data=json.dumps({"notification_urls": "not-a-list"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject non-list notification_urls"
|
||||
assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
|
||||
|
||||
# Test 4: Verify original URLs are preserved after failed update
|
||||
tag = datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
assert tag['notification_urls'] == valid_urls, "URLs should remain unchanged after validation failure"
|
||||
@@ -80,7 +80,10 @@ def test_openapi_validation_invalid_field_in_request_body(client, live_server, m
|
||||
# Should get 400 error due to invalid field (this will be caught by internal validation)
|
||||
# Note: This tests the flow where OpenAPI validation passes but internal validation catches it
|
||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
||||
assert b"Additional properties are not allowed" in res.data, "Should contain validation error about additional properties"
|
||||
# With patternProperties for processor_config_*, the error message format changed slightly
|
||||
assert (b"Additional properties are not allowed" in res.data or
|
||||
b"does not match any of the regexes" in res.data), \
|
||||
"Should contain validation error about additional/invalid properties"
|
||||
|
||||
|
||||
def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -18,7 +18,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
url_for("tags"),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.text.strip() == "{}", "Should be empty list"
|
||||
assert res.get_data(as_text=True).strip() == "{}", "Should be empty list"
|
||||
assert res.status_code == 200
|
||||
|
||||
res = client.post(
|
||||
@@ -36,7 +36,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert new_tag_uuid in res.text
|
||||
assert new_tag_uuid in res.get_data(as_text=True)
|
||||
assert res.json[new_tag_uuid]['title'] == tag_title
|
||||
assert res.json[new_tag_uuid]['notification_muted'] == False
|
||||
|
||||
@@ -118,6 +118,16 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
assert res.status_code == 200
|
||||
assert new_tag_uuid in res.json.get('tags', [])
|
||||
|
||||
# Test that tags are returned when listing ALL watches (issue #3854)
|
||||
res = client.get(
|
||||
url_for("createwatch"), # GET /api/v1/watch - list all watches
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert watch_uuid in res.json, "Watch should be in the list"
|
||||
assert 'tags' in res.json[watch_uuid], "Tags field should be present in watch list"
|
||||
assert new_tag_uuid in res.json[watch_uuid]['tags'], "Tag UUID should be in tags array"
|
||||
|
||||
# Check recheck by tag
|
||||
before_check_time = live_server.app.config['DATASTORE'].data['watching'][watch_uuid].get('last_checked')
|
||||
time.sleep(1)
|
||||
@@ -148,7 +158,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert new_tag_uuid not in res.text
|
||||
assert new_tag_uuid not in res.get_data(as_text=True)
|
||||
|
||||
# Verify tag was removed from watch
|
||||
res = client.get(
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, extract_UUID_from_client, wait_for_all_checks
|
||||
from .util import live_server_setup, extract_UUID_from_client, wait_for_all_checks, delete_all_watches
|
||||
import os
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
||||
# And not this cause its not the ld-json
|
||||
assert b"So let's see what happens" not in res.data
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
##########################################################################################
|
||||
# And we shouldnt see the offer
|
||||
@@ -131,7 +131,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
||||
assert b'ldjson-price-track-offer' not in res.data
|
||||
|
||||
##########################################################################################
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data):
|
||||
@@ -147,7 +147,7 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_
|
||||
|
||||
|
||||
##########################################################################################
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -414,4 +414,4 @@ def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server
|
||||
assert b'Abonnementen bijwerken' in res.data
|
||||
assert b'<foobar' not in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
res = delete_all_watches(client)
|
||||
|
||||
@@ -53,11 +53,21 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
backup = ZipFile(io.BytesIO(res.data))
|
||||
l = backup.namelist()
|
||||
uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||
newlist = list(filter(uuid4hex.match, l)) # Read Note below
|
||||
|
||||
# Check for UUID-based txt files (history and snapshot)
|
||||
uuid4hex_txt = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||
txt_files = list(filter(uuid4hex_txt.match, l))
|
||||
# Should be two txt files in the archive (history and the snapshot)
|
||||
assert len(newlist) == 2
|
||||
assert len(txt_files) == 2
|
||||
|
||||
# Check for watch.json files (new format)
|
||||
uuid4hex_json = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/watch\.json$', re.I)
|
||||
json_files = list(filter(uuid4hex_json.match, l))
|
||||
# Should be one watch.json file in the archive (the imported watch)
|
||||
assert len(json_files) == 1, f"Expected 1 watch.json file, found {len(json_files)}: {json_files}"
|
||||
|
||||
# Check for changedetection.json (settings file)
|
||||
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
||||
|
||||
# Get the latest one
|
||||
res = client.get(
|
||||
|
||||
@@ -6,7 +6,7 @@ from .util import (
|
||||
set_original_response,
|
||||
set_modified_response,
|
||||
live_server_setup,
|
||||
wait_for_all_checks
|
||||
wait_for_all_checks, delete_all_watches
|
||||
)
|
||||
from loguru import logger
|
||||
|
||||
@@ -104,7 +104,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode="", datast
|
||||
assert watch.has_unviewed, "The watch was not marked as unviewed after content change"
|
||||
|
||||
# Clean up
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_everything(live_server, client, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
@@ -0,0 +1,661 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests for immediate commit-based persistence system.
|
||||
|
||||
Tests cover:
|
||||
- Watch.commit() persistence to disk
|
||||
- Concurrent commit safety (race conditions)
|
||||
- Processor config separation
|
||||
- Data loss prevention (settings, tags, watch modifications)
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import wait_for_all_checks
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# 2. Commit() Persistence Tests
|
||||
# ==============================================================================
|
||||
|
||||
def test_watch_commit_persists_to_disk(client, live_server):
|
||||
"""Test that watch.commit() actually writes to watch.json immediately"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Create a watch
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Original Title'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Modify and commit
|
||||
watch['title'] = 'Modified Title'
|
||||
watch['paused'] = True
|
||||
watch.commit()
|
||||
|
||||
# Read directly from disk (bypass datastore cache)
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
assert os.path.exists(watch_json_path), "watch.json should exist on disk"
|
||||
|
||||
with open(watch_json_path, 'r') as f:
|
||||
disk_data = json.load(f)
|
||||
|
||||
assert disk_data['title'] == 'Modified Title', "Title should be persisted to disk"
|
||||
assert disk_data['paused'] == True, "Paused state should be persisted to disk"
|
||||
assert disk_data['uuid'] == uuid, "UUID should match"
|
||||
|
||||
|
||||
def test_watch_commit_survives_reload(client, live_server):
|
||||
"""Test that committed changes survive datastore reload"""
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore_path = datastore.datastore_path
|
||||
|
||||
# Create and modify a watch
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test Watch'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
watch['title'] = 'Persisted Title'
|
||||
watch['paused'] = True
|
||||
watch['tags'] = ['tag-1', 'tag-2']
|
||||
watch.commit()
|
||||
|
||||
# Simulate app restart - create new datastore instance
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
|
||||
datastore2.reload_state(
|
||||
datastore_path=datastore_path,
|
||||
include_default_watches=False,
|
||||
version_tag='test'
|
||||
)
|
||||
|
||||
# Check data survived
|
||||
assert uuid in datastore2.data['watching'], "Watch should exist after reload"
|
||||
reloaded_watch = datastore2.data['watching'][uuid]
|
||||
assert reloaded_watch['title'] == 'Persisted Title', "Title should survive reload"
|
||||
assert reloaded_watch['paused'] == True, "Paused state should survive reload"
|
||||
assert reloaded_watch['tags'] == ['tag-1', 'tag-2'], "Tags should survive reload"
|
||||
|
||||
|
||||
def test_watch_commit_atomic_on_crash(client, live_server):
|
||||
"""Test that atomic writes prevent corruption (temp file pattern)"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Original'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# First successful commit
|
||||
watch['title'] = 'First Save'
|
||||
watch.commit()
|
||||
|
||||
# Verify watch.json exists and is valid
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f) # Should not raise JSONDecodeError
|
||||
assert data['title'] == 'First Save'
|
||||
|
||||
# Second commit - even if interrupted, original file should be intact
|
||||
# (atomic write uses temp file + rename, so original is never corrupted)
|
||||
watch['title'] = 'Second Save'
|
||||
watch.commit()
|
||||
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['title'] == 'Second Save'
|
||||
|
||||
|
||||
def test_multiple_watches_commit_independently(client, live_server):
|
||||
"""Test that committing one watch doesn't affect others"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Create multiple watches
|
||||
uuid1 = datastore.add_watch(url='http://example1.com', extras={'title': 'Watch 1'})
|
||||
uuid2 = datastore.add_watch(url='http://example2.com', extras={'title': 'Watch 2'})
|
||||
uuid3 = datastore.add_watch(url='http://example3.com', extras={'title': 'Watch 3'})
|
||||
|
||||
watch1 = datastore.data['watching'][uuid1]
|
||||
watch2 = datastore.data['watching'][uuid2]
|
||||
watch3 = datastore.data['watching'][uuid3]
|
||||
|
||||
# Modify and commit only watch2
|
||||
watch2['title'] = 'Modified Watch 2'
|
||||
watch2['paused'] = True
|
||||
watch2.commit()
|
||||
|
||||
# Read all from disk
|
||||
def read_watch_json(uuid):
|
||||
watch = datastore.data['watching'][uuid]
|
||||
path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(path, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
data1 = read_watch_json(uuid1)
|
||||
data2 = read_watch_json(uuid2)
|
||||
data3 = read_watch_json(uuid3)
|
||||
|
||||
# Only watch2 should have changes
|
||||
assert data1['title'] == 'Watch 1', "Watch 1 should be unchanged"
|
||||
assert data1['paused'] == False, "Watch 1 should not be paused"
|
||||
|
||||
assert data2['title'] == 'Modified Watch 2', "Watch 2 should be modified"
|
||||
assert data2['paused'] == True, "Watch 2 should be paused"
|
||||
|
||||
assert data3['title'] == 'Watch 3', "Watch 3 should be unchanged"
|
||||
assert data3['paused'] == False, "Watch 3 should not be paused"
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# 3. Concurrency/Race Condition Tests
|
||||
# ==============================================================================
|
||||
|
||||
def test_concurrent_watch_commits_dont_corrupt(client, live_server):
|
||||
"""Test that simultaneous commits to same watch don't corrupt JSON"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
errors = []
|
||||
|
||||
def modify_and_commit(field, value):
|
||||
try:
|
||||
watch[field] = value
|
||||
watch.commit()
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
# Run 10 concurrent commits
|
||||
threads = []
|
||||
for i in range(10):
|
||||
t = threading.Thread(target=modify_and_commit, args=('title', f'Title {i}'))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
# Should not have any errors
|
||||
assert len(errors) == 0, f"Expected no errors, got: {errors}"
|
||||
|
||||
# JSON file should still be valid (not corrupted)
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f) # Should not raise JSONDecodeError
|
||||
assert data['uuid'] == uuid, "UUID should still be correct"
|
||||
assert 'Title' in data['title'], "Title should contain 'Title'"
|
||||
|
||||
|
||||
def test_concurrent_modifications_during_commit(client, live_server):
|
||||
"""Test that modifying watch during commit doesn't cause RuntimeError"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
errors = []
|
||||
stop_flag = threading.Event()
|
||||
|
||||
def keep_modifying():
|
||||
"""Continuously modify watch"""
|
||||
try:
|
||||
i = 0
|
||||
while not stop_flag.is_set():
|
||||
watch['title'] = f'Title {i}'
|
||||
watch['paused'] = i % 2 == 0
|
||||
i += 1
|
||||
time.sleep(0.001)
|
||||
except Exception as e:
|
||||
errors.append(('modifier', e))
|
||||
|
||||
def keep_committing():
|
||||
"""Continuously commit watch"""
|
||||
try:
|
||||
for _ in range(20):
|
||||
watch.commit()
|
||||
time.sleep(0.005)
|
||||
except Exception as e:
|
||||
errors.append(('committer', e))
|
||||
|
||||
# Start concurrent modification and commits
|
||||
modifier = threading.Thread(target=keep_modifying)
|
||||
committer = threading.Thread(target=keep_committing)
|
||||
|
||||
modifier.start()
|
||||
committer.start()
|
||||
|
||||
committer.join()
|
||||
stop_flag.set()
|
||||
modifier.join()
|
||||
|
||||
# Should not have RuntimeError from dict changing during iteration
|
||||
runtime_errors = [e for source, e in errors if isinstance(e, RuntimeError)]
|
||||
assert len(runtime_errors) == 0, f"Should not have RuntimeError, got: {runtime_errors}"
|
||||
|
||||
|
||||
def test_datastore_lock_protects_commit_snapshot(client, live_server):
|
||||
"""Test that datastore.lock prevents race conditions during deepcopy"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com', extras={'title': 'Test'})
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Add some complex nested data
|
||||
watch['browser_steps'] = [
|
||||
{'operation': 'click', 'selector': '#foo'},
|
||||
{'operation': 'wait', 'seconds': 5}
|
||||
]
|
||||
|
||||
errors = []
|
||||
commits_succeeded = [0]
|
||||
|
||||
def rapid_commits():
|
||||
try:
|
||||
for i in range(50):
|
||||
watch['title'] = f'Title {i}'
|
||||
watch.commit()
|
||||
commits_succeeded[0] += 1
|
||||
time.sleep(0.001)
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
# Multiple threads doing rapid commits
|
||||
threads = [threading.Thread(target=rapid_commits) for _ in range(3)]
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert len(errors) == 0, f"Expected no errors, got: {errors}"
|
||||
assert commits_succeeded[0] == 150, f"Expected 150 commits, got {commits_succeeded[0]}"
|
||||
|
||||
# Final JSON should be valid
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['uuid'] == uuid
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# 4. Processor Config Separation Tests
|
||||
# ==============================================================================
|
||||
|
||||
def test_processor_config_never_in_watch_json(client, live_server):
|
||||
"""Test that processor_config_* fields are filtered out of watch.json"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(
|
||||
url='http://example.com',
|
||||
extras={
|
||||
'title': 'Test Watch',
|
||||
'processor': 'restock_diff'
|
||||
}
|
||||
)
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Try to set processor config fields (these should be filtered during commit)
|
||||
watch['processor_config_price_threshold'] = 10.0
|
||||
watch['processor_config_some_setting'] = 'value'
|
||||
watch['processor_config_another'] = {'nested': 'data'}
|
||||
watch.commit()
|
||||
|
||||
# Read watch.json from disk
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Verify processor_config_* fields are NOT in watch.json
|
||||
for key in data.keys():
|
||||
assert not key.startswith('processor_config_'), \
|
||||
f"Found {key} in watch.json - processor configs should be in separate file!"
|
||||
|
||||
# Normal fields should still be there
|
||||
assert data['title'] == 'Test Watch'
|
||||
assert data['processor'] == 'restock_diff'
|
||||
|
||||
|
||||
def test_api_post_saves_processor_config_separately(client, live_server):
|
||||
"""Test that API POST saves processor configs to {processor}.json"""
|
||||
import json
|
||||
from changedetectionio.processors import extract_processor_config_from_form_data
|
||||
|
||||
# Get API key
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Create watch via API with processor config
|
||||
response = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
'url': 'http://example.com',
|
||||
'processor': 'restock_diff',
|
||||
'processor_config_price_threshold': 10.0,
|
||||
'processor_config_in_stock_only': True
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
|
||||
assert response.status_code in (200, 201), f"Expected 200/201, got {response.status_code}"
|
||||
uuid = response.json.get('uuid')
|
||||
assert uuid, "Should return UUID"
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Check that processor config file exists
|
||||
processor_config_path = os.path.join(watch.data_dir, 'restock_diff.json')
|
||||
assert os.path.exists(processor_config_path), "Processor config file should exist"
|
||||
|
||||
with open(processor_config_path, 'r') as f:
|
||||
config = json.load(f)
|
||||
|
||||
# Verify fields are saved WITHOUT processor_config_ prefix
|
||||
assert config.get('price_threshold') == 10.0, "Should have price_threshold (no prefix)"
|
||||
assert config.get('in_stock_only') == True, "Should have in_stock_only (no prefix)"
|
||||
assert 'processor_config_price_threshold' not in config, "Should NOT have prefixed keys"
|
||||
|
||||
|
||||
def test_api_put_saves_processor_config_separately(client, live_server):
|
||||
"""Test that API PUT updates processor configs in {processor}.json"""
|
||||
import json
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Get API key
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Create watch
|
||||
uuid = datastore.add_watch(
|
||||
url='http://example.com',
|
||||
extras={'processor': 'restock_diff'}
|
||||
)
|
||||
|
||||
# Update via API with processor config
|
||||
response = client.put(
|
||||
url_for("watch", uuid=uuid),
|
||||
data=json.dumps({
|
||||
'processor_config_price_threshold': 15.0,
|
||||
'processor_config_min_stock': 5
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
|
||||
# PUT might return different status codes, 200 or 204 are both OK
|
||||
assert response.status_code in (200, 204), f"Expected 200/204, got {response.status_code}: {response.data}"
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Check processor config file
|
||||
processor_config_path = os.path.join(watch.data_dir, 'restock_diff.json')
|
||||
assert os.path.exists(processor_config_path), "Processor config file should exist"
|
||||
|
||||
with open(processor_config_path, 'r') as f:
|
||||
config = json.load(f)
|
||||
|
||||
assert config.get('price_threshold') == 15.0, "Should have updated price_threshold"
|
||||
assert config.get('min_stock') == 5, "Should have min_stock"
|
||||
|
||||
|
||||
def test_ui_edit_saves_processor_config_separately(client, live_server):
|
||||
"""Test that processor_config_* fields never appear in watch.json (even from UI)"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Create watch
|
||||
uuid = datastore.add_watch(
|
||||
url='http://example.com',
|
||||
extras={'processor': 'text_json_diff', 'title': 'Test'}
|
||||
)
|
||||
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Simulate someone accidentally trying to set processor_config fields directly
|
||||
watch['processor_config_should_not_save'] = 'test_value'
|
||||
watch['processor_config_another_field'] = 123
|
||||
watch['normal_field'] = 'this_should_save'
|
||||
watch.commit()
|
||||
|
||||
# Check watch.json has NO processor_config_* fields (main point of this test)
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
watch_data = json.load(f)
|
||||
|
||||
for key in watch_data.keys():
|
||||
assert not key.startswith('processor_config_'), \
|
||||
f"Found {key} in watch.json - processor configs should be filtered during commit"
|
||||
|
||||
# Verify normal fields still save
|
||||
assert watch_data['normal_field'] == 'this_should_save', "Normal fields should save"
|
||||
assert watch_data['title'] == 'Test', "Original fields should still be there"
|
||||
|
||||
|
||||
def test_browser_steps_normalized_to_empty_list(client, live_server):
|
||||
"""Test that meaningless browser_steps are normalized to [] during commit"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com')
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Set browser_steps to meaningless values
|
||||
watch['browser_steps'] = [
|
||||
{'operation': 'Choose one', 'selector': ''},
|
||||
{'operation': 'Goto site', 'selector': ''},
|
||||
{'operation': '', 'selector': '#foo'}
|
||||
]
|
||||
watch.commit()
|
||||
|
||||
# Read from disk
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Should be normalized to empty list
|
||||
assert data['browser_steps'] == [], "Meaningless browser_steps should be normalized to []"
|
||||
|
||||
|
||||
# ==============================================================================
|
||||
# 5. Data Loss Prevention Tests
|
||||
# ==============================================================================
|
||||
|
||||
def test_settings_persist_after_update(client, live_server):
|
||||
"""Test that settings updates are committed and survive restart"""
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore_path = datastore.datastore_path
|
||||
|
||||
# Update settings directly (bypass form validation issues)
|
||||
datastore.data['settings']['application']['empty_pages_are_a_change'] = True
|
||||
datastore.data['settings']['application']['fetch_backend'] = 'html_requests'
|
||||
datastore.data['settings']['requests']['time_between_check']['minutes'] = 120
|
||||
datastore.commit()
|
||||
|
||||
# Simulate restart
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
|
||||
datastore2.reload_state(
|
||||
datastore_path=datastore_path,
|
||||
include_default_watches=False,
|
||||
version_tag='test'
|
||||
)
|
||||
|
||||
# Verify settings survived
|
||||
assert datastore2.data['settings']['application']['empty_pages_are_a_change'] == True, "empty_pages_are_a_change should persist"
|
||||
assert datastore2.data['settings']['application']['fetch_backend'] == 'html_requests', "fetch_backend should persist"
|
||||
assert datastore2.data['settings']['requests']['time_between_check']['minutes'] == 120, "time_between_check should persist"
|
||||
|
||||
|
||||
def test_tag_mute_persists(client, live_server):
|
||||
"""Test that tag mute/unmute operations persist"""
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore_path = datastore.datastore_path
|
||||
|
||||
# Add a tag
|
||||
tag_uuid = datastore.add_tag('Test Tag')
|
||||
|
||||
# Mute the tag
|
||||
response = client.get(url_for("tags.mute", uuid=tag_uuid))
|
||||
assert response.status_code == 302 # Redirect
|
||||
|
||||
# Verify muted in memory
|
||||
assert datastore.data['settings']['application']['tags'][tag_uuid]['notification_muted'] == True
|
||||
|
||||
# Simulate restart
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
|
||||
datastore2.reload_state(
|
||||
datastore_path=datastore_path,
|
||||
include_default_watches=False,
|
||||
version_tag='test'
|
||||
)
|
||||
|
||||
# Verify mute state survived
|
||||
assert tag_uuid in datastore2.data['settings']['application']['tags']
|
||||
assert datastore2.data['settings']['application']['tags'][tag_uuid]['notification_muted'] == True
|
||||
|
||||
|
||||
def test_tag_delete_removes_from_watches(client, live_server):
|
||||
"""Test that deleting a tag removes it from all watches"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Create a tag
|
||||
tag_uuid = datastore.add_tag('Test Tag')
|
||||
|
||||
# Create watches with this tag
|
||||
uuid1 = datastore.add_watch(url='http://example1.com')
|
||||
uuid2 = datastore.add_watch(url='http://example2.com')
|
||||
uuid3 = datastore.add_watch(url='http://example3.com')
|
||||
|
||||
watch1 = datastore.data['watching'][uuid1]
|
||||
watch2 = datastore.data['watching'][uuid2]
|
||||
watch3 = datastore.data['watching'][uuid3]
|
||||
|
||||
watch1['tags'] = [tag_uuid]
|
||||
watch1.commit()
|
||||
watch2['tags'] = [tag_uuid, 'other-tag']
|
||||
watch2.commit()
|
||||
# watch3 has no tags
|
||||
|
||||
# Delete the tag
|
||||
response = client.get(url_for("tags.delete", uuid=tag_uuid))
|
||||
assert response.status_code == 302
|
||||
|
||||
# Wait for background thread to complete
|
||||
time.sleep(1)
|
||||
|
||||
# Tag should be removed from settings
|
||||
assert tag_uuid not in datastore.data['settings']['application']['tags']
|
||||
|
||||
# Tag should be removed from watches and persisted
|
||||
def check_watch_tags(uuid):
|
||||
watch = datastore.data['watching'][uuid]
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
return json.load(f)['tags']
|
||||
|
||||
assert tag_uuid not in check_watch_tags(uuid1), "Tag should be removed from watch1"
|
||||
assert tag_uuid not in check_watch_tags(uuid2), "Tag should be removed from watch2"
|
||||
assert 'other-tag' in check_watch_tags(uuid2), "Other tags should remain in watch2"
|
||||
assert check_watch_tags(uuid3) == [], "Watch3 should still have empty tags"
|
||||
|
||||
|
||||
def test_watch_pause_unpause_persists(client, live_server):
|
||||
"""Test that pause/unpause operations commit and persist"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Get API key
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com')
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Pause via API
|
||||
response = client.get(url_for("watch", uuid=uuid, paused='paused'), headers={'x-api-key': api_key})
|
||||
assert response.status_code == 200
|
||||
|
||||
# Check persisted to disk
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['paused'] == True, "Pause should be persisted"
|
||||
|
||||
# Unpause
|
||||
response = client.get(url_for("watch", uuid=uuid, paused='unpaused'), headers={'x-api-key': api_key})
|
||||
assert response.status_code == 200
|
||||
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['paused'] == False, "Unpause should be persisted"
|
||||
|
||||
|
||||
def test_watch_mute_unmute_persists(client, live_server):
|
||||
"""Test that mute/unmute operations commit and persist"""
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# Get API key
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
uuid = datastore.add_watch(url='http://example.com')
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Mute via API
|
||||
response = client.get(url_for("watch", uuid=uuid, muted='muted'), headers={'x-api-key': api_key})
|
||||
assert response.status_code == 200
|
||||
|
||||
# Check persisted to disk
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['notification_muted'] == True, "Mute should be persisted"
|
||||
|
||||
# Unmute
|
||||
response = client.get(url_for("watch", uuid=uuid, muted='unmuted'), headers={'x-api-key': api_key})
|
||||
assert response.status_code == 200
|
||||
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['notification_muted'] == False, "Unmute should be persisted"
|
||||
|
||||
|
||||
def test_ui_watch_edit_persists_all_fields(client, live_server):
|
||||
"""Test that UI watch edit form persists all modified fields"""
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
datastore_path = datastore.datastore_path
|
||||
|
||||
# Create watch
|
||||
uuid = datastore.add_watch(url='http://example.com')
|
||||
|
||||
# Edit via UI with multiple field changes
|
||||
response = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
'url': 'http://updated-example.com',
|
||||
'title': 'Updated Watch Title',
|
||||
'time_between_check-hours': '2',
|
||||
'time_between_check-minutes': '30',
|
||||
'include_filters': '#content',
|
||||
'fetch_backend': 'html_requests',
|
||||
'method': 'POST',
|
||||
'ignore_text': 'Advertisement\nTracking'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Updated watch" in response.data or b"Saved" in response.data
|
||||
|
||||
# Simulate restart
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path)
|
||||
datastore2.reload_state(
|
||||
datastore_path=datastore_path,
|
||||
include_default_watches=False,
|
||||
version_tag='test'
|
||||
)
|
||||
|
||||
# Verify all fields survived
|
||||
watch = datastore2.data['watching'][uuid]
|
||||
assert watch['url'] == 'http://updated-example.com'
|
||||
assert watch['title'] == 'Updated Watch Title'
|
||||
assert watch['time_between_check']['hours'] == 2
|
||||
assert watch['time_between_check']['minutes'] == 30
|
||||
assert watch['fetch_backend'] == 'html_requests'
|
||||
assert watch['method'] == 'POST'
|
||||
@@ -69,7 +69,7 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
||||
# 1. The page filtered text must contain "5" (first digit of value)
|
||||
# 2. The extracted number should be >= 20 and <= 100
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
"url": test_url,
|
||||
"fetch_backend": "html_requests",
|
||||
@@ -110,25 +110,20 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
||||
|
||||
wait_for_all_checks(client)
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(1)
|
||||
|
||||
# Case 1
|
||||
set_number_in_range_response(datastore_path=datastore_path, number="70.5")
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
time.sleep(2)
|
||||
# 75 is > 20 and < 100 and contains "5"
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
|
||||
# Case 2: Change with one condition violated
|
||||
# Number out of range (150) but contains '5'
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
|
||||
set_number_out_of_range_response(datastore_path=datastore_path, number="150.5")
|
||||
|
||||
@@ -154,7 +149,6 @@ def test_condition_validate_rule_row(client, live_server, measure_memory_usage,
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# the front end submits the current form state which should override the watch in a temporary copy
|
||||
res = client.post(
|
||||
@@ -195,12 +189,8 @@ def test_condition_validate_rule_row(client, live_server, measure_memory_usage,
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert b'false' in res.data
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("ui.form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||
@@ -230,17 +220,12 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage,
|
||||
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
)
|
||||
|
||||
# Assert the word count is counted correctly
|
||||
assert b'<td>13</td>' in res.data
|
||||
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("ui.form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
delete_all_watches(client)
|
||||
|
||||
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||
def test_lev_conditions_plugin(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -64,6 +64,7 @@ def test_DNS_errors(client, live_server, measure_memory_usage, datastore_path):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
@@ -79,7 +80,7 @@ def test_DNS_errors(client, live_server, measure_memory_usage, datastore_path):
|
||||
)
|
||||
assert found_name_resolution_error
|
||||
# Should always record that we tried
|
||||
assert bytes("just now".encode('utf-8')) in res.data
|
||||
assert "just now".encode('utf-8') in res.data or 'seconds ago'.encode('utf-8') in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
# Re 1513
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output, delete_all_watches
|
||||
from ..notification import valid_notification_formats
|
||||
|
||||
|
||||
@@ -118,8 +118,10 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'Warning, no filters were found' in res.data
|
||||
assert not os.path.isfile(notification_file)
|
||||
time.sleep(1)
|
||||
time.sleep(2)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5
|
||||
|
||||
time.sleep(2)
|
||||
@@ -178,6 +180,7 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
|
||||
follow_redirects=True
|
||||
)
|
||||
os.unlink(notification_file)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -185,10 +188,12 @@ def test_check_include_filters_failure_notification(client, live_server, measure
|
||||
run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('htmlcolor'), datastore_path=datastore_path)
|
||||
# Check markup send conversion didnt affect plaintext preference
|
||||
run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('text'), datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
# # live_server_setup(live_server) # Setup on conftest per function
|
||||
run_filter_test(client=client, live_server=live_server, content_filter='//*[@id="nope-doesnt-exist"]', app_notification_format=valid_notification_formats.get('htmlcolor'), datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
# Test that notification is never sent
|
||||
|
||||
@@ -197,3 +202,4 @@ def test_basic_markup_from_text(client, live_server, measure_memory_usage, datas
|
||||
from ..notification.handler import markup_text_links_to_html
|
||||
x = markup_text_links_to_html("hello https://google.com")
|
||||
assert 'a href' in x
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -5,6 +5,8 @@ from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches
|
||||
import os
|
||||
|
||||
from ..store import ChangeDetectionStore
|
||||
|
||||
|
||||
# def test_setup(client, live_server, measure_memory_usage, datastore_path):
|
||||
# live_server_setup(live_server) # Setup on conftest per function
|
||||
@@ -166,7 +168,8 @@ def test_tag_add_in_ui(client, live_server, measure_memory_usage, datastore_path
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_group_tag_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
@@ -473,3 +476,143 @@ the {test} appeared before. {test in res.data[:n]=}
|
||||
n += t_index + len(test)
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_tag_json_persistence(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that tags are saved to individual tag.json files and loaded correctly.
|
||||
|
||||
This test verifies the update_27 tag storage refactoring:
|
||||
- Tags are saved to {uuid}/tag.json files
|
||||
- Tags persist across datastore restarts
|
||||
- Tag edits write to tag.json
|
||||
- Tag deletion removes tag.json file
|
||||
"""
|
||||
import json
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# 1. Create a tag
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_add"),
|
||||
data={"name": "persistence-test-tag"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Tag added" in res.data
|
||||
|
||||
tag_uuid = get_UUID_for_tag_name(client, name="persistence-test-tag")
|
||||
assert tag_uuid, "Tag UUID should exist"
|
||||
|
||||
# 2. Verify tag.json file was created
|
||||
tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
|
||||
assert os.path.exists(tag_json_path), f"tag.json should exist at {tag_json_path}"
|
||||
|
||||
# 3. Verify tag.json contains correct data
|
||||
with open(tag_json_path, 'r') as f:
|
||||
tag_data = json.load(f)
|
||||
assert tag_data['title'] == 'persistence-test-tag'
|
||||
assert tag_data['uuid'] == tag_uuid
|
||||
assert 'date_created' in tag_data
|
||||
|
||||
# 4. Edit the tag
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_edit_submit", uuid=tag_uuid),
|
||||
data={
|
||||
"name": "persistence-test-tag",
|
||||
"notification_muted": True,
|
||||
"include_filters": '#test-filter'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated" in res.data
|
||||
|
||||
# 5. Verify tag.json was updated
|
||||
with open(tag_json_path, 'r') as f:
|
||||
tag_data = json.load(f)
|
||||
assert tag_data['notification_muted'] == True
|
||||
assert '#test-filter' in tag_data.get('include_filters', [])
|
||||
|
||||
# 5a. Verify tag is NOT in changedetection.json (tags should be in tag.json only)
|
||||
changedetection_json_path = os.path.join(datastore_path, "changedetection.json")
|
||||
with open(changedetection_json_path, 'r') as f:
|
||||
settings_data = json.load(f)
|
||||
# Tags dict should be empty in settings (all tags are in individual files)
|
||||
assert settings_data['settings']['application']['tags'] == {}, \
|
||||
"Tags should NOT be saved to changedetection.json (should be empty dict)"
|
||||
|
||||
# 6. Simulate restart - reload datastore
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path, include_default_watches=False, version_tag='test')
|
||||
|
||||
# 7. Verify tag was loaded from tag.json
|
||||
assert tag_uuid in datastore2.data['settings']['application']['tags']
|
||||
loaded_tag = datastore2.data['settings']['application']['tags'][tag_uuid]
|
||||
assert loaded_tag['title'] == 'persistence-test-tag'
|
||||
assert loaded_tag['notification_muted'] == True
|
||||
assert '#test-filter' in loaded_tag.get('include_filters', [])
|
||||
|
||||
# 8. Delete the tag via API
|
||||
res = client.get(url_for("tags.delete", uuid=tag_uuid), follow_redirects=True)
|
||||
assert b"Tag deleted" in res.data
|
||||
|
||||
# 9. Verify tag.json file was deleted
|
||||
assert not os.path.exists(tag_json_path), f"tag.json should be deleted at {tag_json_path}"
|
||||
|
||||
# 10. Verify tag is removed from settings
|
||||
assert tag_uuid not in datastore.data['settings']['application']['tags']
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_tag_json_migration_update_27(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that update_27 migration correctly moves tags to individual files.
|
||||
|
||||
This simulates a pre-update_27 datastore and verifies migration works.
|
||||
"""
|
||||
import json
|
||||
|
||||
# 1. Create multiple tags
|
||||
tag_names = ['migration-tag-1', 'migration-tag-2', 'migration-tag-3']
|
||||
tag_uuids = []
|
||||
|
||||
for tag_name in tag_names:
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_add"),
|
||||
data={"name": tag_name},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Tag added" in res.data
|
||||
tag_uuid = get_UUID_for_tag_name(client, name=tag_name)
|
||||
tag_uuids.append(tag_uuid)
|
||||
|
||||
# 2. Verify all tag.json files exist (update_27 already ran during add_tag)
|
||||
for tag_uuid in tag_uuids:
|
||||
tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
|
||||
assert os.path.exists(tag_json_path), f"tag.json should exist for {tag_uuid}"
|
||||
|
||||
# 2a. Verify tags are NOT in changedetection.json
|
||||
changedetection_json_path = os.path.join(datastore_path, "changedetection.json")
|
||||
with open(changedetection_json_path, 'r') as f:
|
||||
settings_data = json.load(f)
|
||||
assert settings_data['settings']['application']['tags'] == {}, \
|
||||
"Tags should NOT be in changedetection.json after migration"
|
||||
|
||||
# 3. Simulate restart
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path, include_default_watches=False, version_tag='test')
|
||||
|
||||
# 4. Verify all tags loaded from tag.json files
|
||||
for idx, tag_uuid in enumerate(tag_uuids):
|
||||
assert tag_uuid in datastore2.data['settings']['application']['tags']
|
||||
loaded_tag = datastore2.data['settings']['application']['tags'][tag_uuid]
|
||||
assert loaded_tag['title'] == tag_names[idx]
|
||||
|
||||
# Cleanup
|
||||
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
|
||||
assert b'All tags deleted' in res.data
|
||||
|
||||
# Verify all tag.json files were deleted
|
||||
for tag_uuid in tag_uuids:
|
||||
tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
|
||||
assert not os.path.exists(tag_json_path), f"tag.json should be deleted for {tag_uuid}"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -59,11 +59,29 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
||||
# Wait for the sync DB save to happen
|
||||
time.sleep(2)
|
||||
|
||||
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
|
||||
# Check which format is being used
|
||||
datastore_path = live_server.app.config['DATASTORE'].datastore_path
|
||||
changedetection_json = os.path.join(datastore_path, 'changedetection.json')
|
||||
url_watches_json = os.path.join(datastore_path, 'url-watches.json')
|
||||
|
||||
json_obj = None
|
||||
with open(json_db_file, 'r', encoding='utf-8') as f:
|
||||
json_obj = json.load(f)
|
||||
json_obj = {'watching': {}}
|
||||
|
||||
if os.path.exists(changedetection_json):
|
||||
# New format: individual watch.json files
|
||||
logger.info("Testing with new format (changedetection.json + individual watch.json)")
|
||||
|
||||
# Load each watch.json file
|
||||
for uuid in live_server.app.config['DATASTORE'].data['watching'].keys():
|
||||
watch_json_file = os.path.join(datastore_path, uuid, 'watch.json')
|
||||
assert os.path.isfile(watch_json_file), f"watch.json should exist at {watch_json_file}"
|
||||
|
||||
with open(watch_json_file, 'r', encoding='utf-8') as f:
|
||||
json_obj['watching'][uuid] = json.load(f)
|
||||
else:
|
||||
# Legacy format: url-watches.json
|
||||
logger.info("Testing with legacy format (url-watches.json)")
|
||||
with open(url_watches_json, 'r', encoding='utf-8') as f:
|
||||
json_obj = json.load(f)
|
||||
|
||||
# assert the right amount of watches was found in the JSON
|
||||
assert len(json_obj['watching']) == len(workers), "Correct number of watches was found in the JSON"
|
||||
@@ -88,7 +106,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
||||
|
||||
# Find the snapshot one
|
||||
for fname in files_in_watch_dir:
|
||||
if fname != 'history.txt' and 'html' not in fname:
|
||||
if fname != 'history.txt' and fname != 'watch.json' and 'html' not in fname:
|
||||
if strtobool(os.getenv("TEST_WITH_BROTLI")):
|
||||
assert fname.endswith('.br'), "Forced TEST_WITH_BROTLI then it should be a .br filename"
|
||||
|
||||
@@ -105,13 +123,27 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
||||
assert json_obj['watching'][w]['title'], "Watch should have a title set"
|
||||
assert contents.startswith(watch_title + "x"), f"Snapshot contents in file {fname} should start with '{watch_title}x', got '{contents}'"
|
||||
|
||||
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
|
||||
# With new format, we also have watch.json, so 4 files total
|
||||
if os.path.exists(changedetection_json):
|
||||
assert len(files_in_watch_dir) == 4, "Should be four files in the dir with new format: watch.json, html.br snapshot, history.txt and the extracted text snapshot"
|
||||
else:
|
||||
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir with legacy format: html.br snapshot, history.txt and the extracted text snapshot"
|
||||
|
||||
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
|
||||
with open(json_db_file, 'r', encoding='utf-8') as f:
|
||||
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
|
||||
# Check that 'default' Watch vars aren't accidentally being saved
|
||||
if os.path.exists(changedetection_json):
|
||||
# New format: check all individual watch.json files
|
||||
for uuid in json_obj['watching'].keys():
|
||||
watch_json_file = os.path.join(datastore_path, uuid, 'watch.json')
|
||||
with open(watch_json_file, 'r', encoding='utf-8') as f:
|
||||
assert '"default"' not in f.read(), f"'default' probably shouldnt be here in {watch_json_file}, it came from when the 'default' Watch vars were accidently being saved"
|
||||
else:
|
||||
# Legacy format: check url-watches.json
|
||||
with open(url_watches_json, 'r', encoding='utf-8') as f:
|
||||
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
|
||||
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_text_history_view(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
@@ -132,7 +164,7 @@ def test_check_text_history_view(client, live_server, measure_memory_usage, data
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("ui.ui_diff.diff_history_page", uuid="first"))
|
||||
res = client.get(url_for("ui.ui_diff.diff_history_page", uuid=uuid))
|
||||
assert b'test-one' in res.data
|
||||
assert b'test-two' in res.data
|
||||
|
||||
@@ -150,3 +182,86 @@ def test_check_text_history_view(client, live_server, measure_memory_usage, data
|
||||
assert b'test-one' not in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_history_trim_global_only(client, live_server, measure_memory_usage, datastore_path):
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
uuid = None
|
||||
limit = 3
|
||||
|
||||
for i in range(0, 10):
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(f"<html>test {i}</html>")
|
||||
if not uuid:
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
if i ==8:
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
history_n = len(list(watch.history.keys()))
|
||||
logger.debug(f"History length should be at limit {limit} and it is {history_n}")
|
||||
assert history_n == limit
|
||||
|
||||
if i == 6:
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={"application-history_snapshot_max_length": limit},
|
||||
follow_redirects=True
|
||||
)
|
||||
# It will need to detect one more change to start trimming it, which is really at 'start of 7'
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_history_trim_global_override_in_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
uuid = None
|
||||
limit = 3
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={"application-history_snapshot_max_length": 10000},
|
||||
follow_redirects=True
|
||||
)
|
||||
# It will need to detect one more change to start trimming it, which is really at 'start of 7'
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
|
||||
for i in range(0, 10):
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(f"<html>test {i}</html>")
|
||||
if not uuid:
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={"include_filters": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests",
|
||||
"time_between_check_use_default": "y", "history_snapshot_max_length": str(limit)},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
if i == 8:
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
history_n = len(list(watch.history.keys()))
|
||||
logger.debug(f"History length should be at limit {limit} and it is {history_n}")
|
||||
assert history_n == limit
|
||||
|
||||
if i == 6:
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={"application-history_snapshot_max_length": limit},
|
||||
follow_redirects=True
|
||||
)
|
||||
# It will need to detect one more change to start trimming it, which is really at 'start of 7'
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
@@ -160,7 +160,7 @@ def test_invalid_locale(client, live_server, measure_memory_usage, datastore_pat
|
||||
def test_language_persistence_in_session(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that the language preference persists across multiple requests
|
||||
within the same session.
|
||||
within the same session, and that auto-detect properly clears the preference.
|
||||
"""
|
||||
|
||||
# Establish session cookie
|
||||
@@ -184,6 +184,34 @@ def test_language_persistence_in_session(client, live_server, measure_memory_usa
|
||||
assert res.status_code == 200
|
||||
assert b"Annulla" in res.data, "Italian text should persist across requests"
|
||||
|
||||
# Verify locale is in session
|
||||
with client.session_transaction() as sess:
|
||||
assert sess.get('locale') == 'it', "Locale should be set in session"
|
||||
|
||||
# Call auto-detect to clear the locale
|
||||
res = client.get(
|
||||
url_for("ui.delete_locale_language_session_var_if_it_exists"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
# Verify the flash message appears (in English since we cleared the locale)
|
||||
assert b"Language set to auto-detect from browser" in res.data, "Should show flash message"
|
||||
|
||||
# Verify locale was removed from session
|
||||
with client.session_transaction() as sess:
|
||||
assert 'locale' not in sess, "Locale should be removed from session after auto-detect"
|
||||
|
||||
# Now requests should use browser default (English in test environment)
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
assert b"Cancel" in res.data, "Should show English after auto-detect clears Italian"
|
||||
assert b"Annulla" not in res.data, "Should not show Italian after auto-detect"
|
||||
|
||||
|
||||
def test_set_language_with_redirect(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
|
||||
@@ -40,10 +40,7 @@ def set_some_changed_response(datastore_path):
|
||||
|
||||
|
||||
def test_normal_page_check_works_with_ignore_status_code(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
from loguru import logger
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
@@ -62,20 +59,41 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
|
||||
logger.info(f"TEST: First check - queuing UUID {uuid}")
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
logger.info(f"TEST: Waiting for first check to complete")
|
||||
wait_result = wait_for_all_checks(client)
|
||||
logger.info(f"TEST: First check wait completed: {wait_result}")
|
||||
|
||||
# Check history after first check
|
||||
watch = client.application.config.get('DATASTORE').data['watching'][uuid]
|
||||
logger.info(f"TEST: After first check - history count: {len(watch.history.keys())}")
|
||||
|
||||
set_some_changed_response(datastore_path=datastore_path)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Trigger a check
|
||||
logger.info(f"TEST: Second check - queuing UUID {uuid}")
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
logger.info(f"TEST: Waiting for second check to complete")
|
||||
wait_result = wait_for_all_checks(client)
|
||||
logger.info(f"TEST: Second check wait completed: {wait_result}")
|
||||
|
||||
# Check history after second check
|
||||
watch = client.application.config.get('DATASTORE').data['watching'][uuid]
|
||||
logger.info(f"TEST: After second check - history count: {len(watch.history.keys())}")
|
||||
logger.info(f"TEST: Watch history keys: {list(watch.history.keys())}")
|
||||
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
|
||||
if b'has-unread-changes' not in res.data:
|
||||
logger.error(f"TEST FAILED: has-unread-changes not found in response")
|
||||
logger.error(f"TEST: Watch last_error: {watch.get('last_error')}")
|
||||
logger.error(f"TEST: Watch last_checked: {watch.get('last_checked')}")
|
||||
|
||||
assert b'has-unread-changes' in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ def test_import_distillio(client, live_server, measure_memory_usage, datastore_p
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={
|
||||
|
||||
@@ -224,6 +224,7 @@ def check_json_filter(json_filter, client, live_server, datastore_path):
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
|
||||
delete_all_watches(client)
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', content_type="application/json", _external=True)
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"include_filters": json_filter.splitlines()})
|
||||
@@ -297,14 +298,17 @@ def check_json_filter_bool_val(json_filter, client, live_server, datastore_path)
|
||||
|
||||
def test_check_jsonpath_filter_bool_val(client, live_server, measure_memory_usage, datastore_path):
|
||||
check_json_filter_bool_val("json:$['available']", client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_jq_filter_bool_val(client, live_server, measure_memory_usage, datastore_path):
|
||||
if jq_support:
|
||||
check_json_filter_bool_val("jq:.available", client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_jqraw_filter_bool_val(client, live_server, measure_memory_usage, datastore_path):
|
||||
if jq_support:
|
||||
check_json_filter_bool_val("jq:.available", client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
# Re #265 - Extended JSON selector test
|
||||
# Stuff to consider here
|
||||
@@ -452,14 +456,17 @@ def test_correct_header_detect(client, live_server, measure_memory_usage, datast
|
||||
|
||||
def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage, datastore_path):
|
||||
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_jq_ext_filter(client, live_server, measure_memory_usage, datastore_path):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_jqraw_ext_filter(client, live_server, measure_memory_usage, datastore_path):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_jsonpath_BOM_utf8(client, live_server, measure_memory_usage, datastore_path):
|
||||
from .. import html_tools
|
||||
@@ -470,5 +477,6 @@ def test_jsonpath_BOM_utf8(client, live_server, measure_memory_usage, datastore_
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(json_str, "json:$.name")
|
||||
assert text == '"José"'
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
|
||||
@@ -313,14 +313,8 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
|
||||
# Add a watch and trigger a HTTP POST
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'nice one'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Watch added" in res.data
|
||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
watch_uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, tag="nice one")
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, delete_all_watches
|
||||
import logging
|
||||
|
||||
def test_check_notification_error_handling(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -81,4 +81,4 @@ def test_check_notification_error_handling(client, live_server, measure_memory_u
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
assert 'xxxxx' in notification_submission
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||
from ..notification import valid_notification_formats
|
||||
from loguru import logger
|
||||
|
||||
def test_queue_system(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that multiple workers can process queue concurrently without blocking each other"""
|
||||
# (pytest) Werkzeug's threaded server uses ThreadPoolExecutor with a default limit of around 40 threads (or min(32, os.cpu_count() + 4)).
|
||||
items = os.cpu_count() +3
|
||||
delay = 10
|
||||
# Auto-queue is off here.
|
||||
live_server.app.config['DATASTORE'].data['settings']['application']['all_paused'] = True
|
||||
|
||||
test_urls = [
|
||||
f"{url_for('test_endpoint', _external=True)}?delay={delay}&id={i}&content=hello+test+content+{i}"
|
||||
for i in range(0, items)
|
||||
]
|
||||
|
||||
# Import 30 URLs to queue
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": "\r\n".join(test_urls)},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert f"{items} Imported".encode('utf-8') in res.data
|
||||
|
||||
client.application.set_workers(items)
|
||||
|
||||
start = time.time()
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(delay/2)
|
||||
|
||||
# Verify all workers are idle (no UUIDs being processed)
|
||||
from changedetectionio import worker_pool
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
logger.debug( f"Should be atleast some workers running - {len(running_uuids)} UUIDs still being processed: {running_uuids}")
|
||||
assert len(running_uuids) != 0, f"Should be atleast some workers running - {len(running_uuids)} UUIDs still being processed: {running_uuids}"
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# all workers should be done in less than say 10 seconds (they take time to 'see' something is in the queue too)
|
||||
total_time = (time.time() - start)
|
||||
logger.debug(f"All workers finished {items} items in less than {delay} seconds per job. {total_time}s total")
|
||||
# if there was a bug in queue handler not running parallel, this would blow out to items*delay seconds
|
||||
assert total_time < delay + 10, f"All workers finished {items} items in less than {delay} seconds per job, total time {total_time}s"
|
||||
|
||||
# Verify all workers are idle (no UUIDs being processed)
|
||||
from changedetectionio import worker_pool
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
assert len(running_uuids) == 0, f"Expected all workers to be idle, but {len(running_uuids)} UUIDs still being processed: {running_uuids}"
|
||||
@@ -17,12 +17,12 @@ def test_headers_in_request(client, live_server, measure_memory_usage, datastore
|
||||
test_url = test_url.replace('localhost', 'changedet')
|
||||
|
||||
# Add the test URL twice, we will check
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
uuidA = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
uuidB = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
@@ -31,7 +31,7 @@ def test_headers_in_request(client, live_server, measure_memory_usage, datastore
|
||||
|
||||
# Add some headers to a request
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuidA),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
@@ -42,13 +42,14 @@ def test_headers_in_request(client, live_server, measure_memory_usage, datastore
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick up the first version
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# The service should echo back the request headers
|
||||
res = client.get(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
url_for("ui.ui_preview.preview_page", uuid=uuidA),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -92,7 +93,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
|
||||
# add the first 'version'
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
@@ -110,7 +111,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
body_value = 'Test Body Value {{ 1+1 }}'
|
||||
body_value_formatted = 'Test Body Value 2'
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
@@ -126,7 +127,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
|
||||
# The service should echo back the body
|
||||
res = client.get(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
url_for("ui.ui_preview.preview_page", uuid=uuid),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -142,10 +143,14 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
watches_with_body = 0
|
||||
with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
|
||||
app_struct = json.load(f)
|
||||
for uuid in app_struct['watching']:
|
||||
if app_struct['watching'][uuid]['body']==body_value:
|
||||
|
||||
# Read individual watch.json files
|
||||
for uuid in client.application.config.get('DATASTORE').data['watching'].keys():
|
||||
watch_json_file = os.path.join(datastore_path, uuid, 'watch.json')
|
||||
assert os.path.exists(watch_json_file), f"watch.json should exist at {watch_json_file}"
|
||||
with open(watch_json_file, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
if watch_data.get('body') == body_value:
|
||||
watches_with_body += 1
|
||||
|
||||
# Should be only one with body set
|
||||
@@ -153,7 +158,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
|
||||
# Attempt to add a body with a GET method
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
@@ -225,10 +230,14 @@ def test_method_in_request(client, live_server, measure_memory_usage, datastore_
|
||||
wait_for_all_checks(client)
|
||||
|
||||
watches_with_method = 0
|
||||
with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
|
||||
app_struct = json.load(f)
|
||||
for uuid in app_struct['watching']:
|
||||
if app_struct['watching'][uuid]['method'] == 'PATCH':
|
||||
|
||||
# Read individual watch.json files
|
||||
for uuid in client.application.config.get('DATASTORE').data['watching'].keys():
|
||||
watch_json_file = os.path.join(datastore_path, uuid, 'watch.json')
|
||||
assert os.path.exists(watch_json_file), f"watch.json should exist at {watch_json_file}"
|
||||
with open(watch_json_file, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
if watch_data.get('method') == 'PATCH':
|
||||
watches_with_method += 1
|
||||
|
||||
# Should be only one with method set to PATCH
|
||||
|
||||
@@ -236,6 +236,7 @@ def test_restock_itemprop_with_tag(client, live_server, measure_memory_usage, da
|
||||
}
|
||||
|
||||
_run_test_minmax_limit(client, extra_watch_edit_form=extras,datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
|
||||
@@ -388,9 +389,10 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(url_for("ui.ui_notification.ajax_callback_send_notification_test", watch_uuid=uuid), data={}, follow_redirects=True)
|
||||
time.sleep(5)
|
||||
wait_for_notification_endpoint_output(datastore_path=datastore_path)
|
||||
assert os.path.isfile(os.path.join(datastore_path, "notification.txt")), "Notification received"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_data_sanity(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -406,6 +408,7 @@ def test_data_sanity(client, live_server, measure_memory_usage, datastore_path):
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
@@ -417,6 +420,7 @@ def test_data_sanity(client, live_server, measure_memory_usage, datastore_path):
|
||||
data={"url": test_url2, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert str(res.data.decode()).count("950.95") == 1, "Price should only show once (for the watch added, no other watches yet)"
|
||||
@@ -462,3 +466,4 @@ def test_special_prop_examples(client, live_server, measure_memory_usage, datast
|
||||
assert b'ception' not in res.data
|
||||
assert b'155.55' in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -107,7 +107,7 @@ def test_rss_and_token(client, live_server, measure_memory_usage, datastore_path
|
||||
assert b"Access denied, bad token" not in res.data
|
||||
assert b"Random content" in res.data
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_basic_cdata_rss_markup(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ def test_rss_feed_empty(client, live_server, measure_memory_usage, datastore_pat
|
||||
)
|
||||
assert res.status_code == 400
|
||||
assert b'does not have enough history snapshots to show' in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_rss_single_watch_order(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
|
||||
@@ -24,20 +24,20 @@ def test_share_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={"include_filters": include_filters, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
)
|
||||
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||
|
||||
# click share the link
|
||||
res = client.get(
|
||||
url_for("ui.form_share_put_watch", uuid="first"),
|
||||
url_for("ui.form_share_put_watch", uuid=uuid),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -63,13 +63,16 @@ def test_share_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
# Now hit edit, we should see what we expect
|
||||
# that the import fetched the meta-data
|
||||
|
||||
uuids = list(client.application.config.get('DATASTORE').data['watching'])
|
||||
assert uuids, "It saved/imported and created a new URL from the share"
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuids[0]),
|
||||
)
|
||||
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||
|
||||
# Check it saved the URL
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert bytes(test_url.encode('utf-8')) in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
@@ -25,6 +25,7 @@ def test_recheck_time_field_validation_global_settings(client, live_server, meas
|
||||
|
||||
|
||||
assert REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT.encode('utf-8') in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_recheck_time_field_validation_single_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -94,6 +95,7 @@ def test_recheck_time_field_validation_single_watch(client, live_server, measure
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
assert REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT.encode('utf-8') not in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_checkbox_open_diff_in_new_tab(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -242,6 +244,7 @@ def test_page_title_listing_behaviour(client, live_server, measure_memory_usage,
|
||||
# No page title description, and 'use_page_title_in_list' is on, it should show the <title>
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b"head titlecustom html" in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_ui_viewed_unread_flag(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -283,4 +286,5 @@ def test_ui_viewed_unread_flag(client, live_server, measure_memory_usage, datast
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'<span id="unread-tab-counter">0</span>' in res.data
|
||||
assert b'<span id="unread-tab-counter">0</span>' in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -366,7 +366,7 @@ def test_check_with_prefix_include_filters(client, live_server, measure_memory_u
|
||||
assert b"Some text thats the same" in res.data # in selector
|
||||
assert b"Some text that will change" not in res.data # not in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_various_rules(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -423,7 +423,7 @@ def test_xpath_20(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={"include_filters": "//*[contains(@class, 'sametext')]|//*[contains(@class, 'changetext')]",
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
@@ -437,14 +437,14 @@ def test_xpath_20(client, live_server, measure_memory_usage, datastore_path):
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
url_for("ui.ui_preview.preview_page", uuid=uuid),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Some text thats the same" in res.data # in selector
|
||||
assert b"Some text that will change" in res.data # in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_xpath_20_function_count(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -477,7 +477,7 @@ def test_xpath_20_function_count(client, live_server, measure_memory_usage, data
|
||||
|
||||
assert b"246913579975308642" in res.data # in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_xpath_20_function_count2(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -501,6 +501,8 @@ def test_xpath_20_function_count2(client, live_server, measure_memory_usage, dat
|
||||
)
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
@@ -510,7 +512,7 @@ def test_xpath_20_function_count2(client, live_server, measure_memory_usage, dat
|
||||
|
||||
assert b"246913579975308642" in res.data # in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_xpath_20_function_string_join_matches(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -544,7 +546,7 @@ def test_xpath_20_function_string_join_matches(client, live_server, measure_memo
|
||||
|
||||
assert b"Some text thats the samespecialconjunctionSome text that will change" in res.data # in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def _subtest_xpath_rss(client, datastore_path, content_type='text/html'):
|
||||
@@ -582,7 +584,7 @@ def _subtest_xpath_rss(client, datastore_path, content_type='text/html'):
|
||||
assert b"Lets go discount" in res.data, f"When testing for Lets go discount called with content type '{content_type}'"
|
||||
assert b"Events and Announcements" not in res.data, f"When testing for Lets go discount called with content type '{content_type}'" # It should not be here because thats not our selector target
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
# Be sure all-in-the-wild types of RSS feeds work with xpath
|
||||
def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -5,15 +5,24 @@
|
||||
|
||||
import unittest
|
||||
import os
|
||||
import pickle
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.model import Watch
|
||||
from changedetectionio.model import Watch, Tag
|
||||
|
||||
# mostly
|
||||
class TestDiffBuilder(unittest.TestCase):
|
||||
|
||||
def test_watch_get_suggested_from_diff_timestamp(self):
|
||||
import uuid as uuid_builder
|
||||
watch = Watch.model(datastore_path='/tmp', default={})
|
||||
# Create minimal mock datastore for tests
|
||||
mock_datastore = {
|
||||
'settings': {
|
||||
'application': {}
|
||||
},
|
||||
'watching': {}
|
||||
}
|
||||
watch = Watch.model(datastore_path='/tmp', __datastore=mock_datastore, default={})
|
||||
watch.ensure_data_dir_exists()
|
||||
|
||||
|
||||
@@ -49,7 +58,7 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
assert p == "109", "Correct when its the same time"
|
||||
|
||||
# new empty one
|
||||
watch = Watch.model(datastore_path='/tmp', default={})
|
||||
watch = Watch.model(datastore_path='/tmp', __datastore=mock_datastore, default={})
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == None, "None when no history available"
|
||||
|
||||
@@ -61,5 +70,184 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
p = watch.get_from_version_based_on_last_viewed
|
||||
assert p == "100", "Correct with only one history snapshot"
|
||||
|
||||
def test_watch_deepcopy_doesnt_copy_datastore(self):
|
||||
"""
|
||||
CRITICAL: Ensure deepcopy(watch) shares __datastore instead of copying it.
|
||||
|
||||
Without this, deepcopy causes exponential memory growth:
|
||||
- 100 watches × deepcopy each = 10,000 watch objects in memory (100²)
|
||||
- Memory grows from 120MB → 2GB
|
||||
|
||||
This test prevents regressions in the __deepcopy__ implementation.
|
||||
"""
|
||||
# Create mock datastore with multiple watches
|
||||
mock_datastore = {
|
||||
'settings': {'application': {'history_snapshot_max_length': 10}},
|
||||
'watching': {}
|
||||
}
|
||||
|
||||
# Create 3 watches that all reference the same datastore
|
||||
watches = []
|
||||
for i in range(3):
|
||||
watch = Watch.model(
|
||||
__datastore=mock_datastore,
|
||||
datastore_path='/tmp/test',
|
||||
default={'url': f'https://example{i}.com', 'title': f'Watch {i}'}
|
||||
)
|
||||
mock_datastore['watching'][watch['uuid']] = watch
|
||||
watches.append(watch)
|
||||
|
||||
# Test 1: Deepcopy shares datastore reference (doesn't copy it)
|
||||
watch_copy = deepcopy(watches[0])
|
||||
|
||||
self.assertIsNotNone(watch_copy._datastore,
|
||||
"__datastore should exist in copied watch")
|
||||
self.assertIs(watch_copy._datastore, watches[0]._datastore,
|
||||
"__datastore should be SHARED (same object), not copied")
|
||||
self.assertIs(watch_copy._datastore, mock_datastore,
|
||||
"__datastore should reference the original datastore")
|
||||
|
||||
# Test 2: Dict data is properly copied (not shared)
|
||||
self.assertEqual(watch_copy['title'], 'Watch 0', "Dict data should be copied")
|
||||
watch_copy['title'] = 'MODIFIED'
|
||||
self.assertNotEqual(watches[0]['title'], 'MODIFIED',
|
||||
"Modifying copy should not affect original")
|
||||
|
||||
# Test 3: Verify no nested datastore copies in watch dict
|
||||
# The dict should only contain watch settings, not the datastore
|
||||
watch_dict = dict(watch_copy)
|
||||
self.assertNotIn('__datastore', watch_dict,
|
||||
"__datastore should not be in dict keys")
|
||||
self.assertNotIn('_model__datastore', watch_dict,
|
||||
"_model__datastore should not be in dict keys")
|
||||
|
||||
# Test 4: Multiple deepcopies don't cause exponential memory growth
|
||||
# If datastore was copied, each copy would contain 3 watches,
|
||||
# and those watches would contain the datastore, etc. (infinite recursion)
|
||||
copies = []
|
||||
for _ in range(5):
|
||||
copies.append(deepcopy(watches[0]))
|
||||
|
||||
# All copies should share the same datastore
|
||||
for copy in copies:
|
||||
self.assertIs(copy._datastore, mock_datastore,
|
||||
"All copies should share the original datastore")
|
||||
|
||||
def test_watch_pickle_doesnt_serialize_datastore(self):
|
||||
"""
|
||||
Ensure pickle/unpickle doesn't serialize __datastore.
|
||||
|
||||
This is important for multiprocessing and caching - we don't want
|
||||
to serialize the entire datastore when pickling a watch.
|
||||
"""
|
||||
mock_datastore = {
|
||||
'settings': {'application': {}},
|
||||
'watching': {}
|
||||
}
|
||||
|
||||
watch = Watch.model(
|
||||
__datastore=mock_datastore,
|
||||
datastore_path='/tmp/test',
|
||||
default={'url': 'https://example.com', 'title': 'Test Watch'}
|
||||
)
|
||||
|
||||
# Pickle and unpickle
|
||||
pickled = pickle.dumps(watch)
|
||||
unpickled_watch = pickle.loads(pickled)
|
||||
|
||||
# Test 1: Watch data is preserved
|
||||
self.assertEqual(unpickled_watch['url'], 'https://example.com',
|
||||
"Dict data should be preserved after pickle/unpickle")
|
||||
|
||||
# Test 2: __datastore is NOT serialized (attribute shouldn't exist after unpickle)
|
||||
self.assertFalse(hasattr(unpickled_watch, '_datastore'),
|
||||
"__datastore attribute should not exist after unpickle (not serialized)")
|
||||
|
||||
# Test 3: Pickled data shouldn't contain the large datastore object
|
||||
# If datastore was serialized, the pickle size would be much larger
|
||||
pickle_size = len(pickled)
|
||||
# A single watch should be small (< 10KB), not include entire datastore
|
||||
self.assertLess(pickle_size, 10000,
|
||||
f"Pickled watch too large ({pickle_size} bytes) - might include datastore")
|
||||
|
||||
def test_tag_deepcopy_works(self):
|
||||
"""
|
||||
Ensure Tag objects (which also inherit from watch_base) can be deepcopied.
|
||||
|
||||
Tags now have optional __datastore for consistency with Watch objects.
|
||||
"""
|
||||
mock_datastore = {
|
||||
'settings': {'application': {}},
|
||||
'watching': {}
|
||||
}
|
||||
|
||||
# Test 1: Tag without datastore (backward compatibility)
|
||||
tag_without_ds = Tag.model(
|
||||
datastore_path='/tmp/test',
|
||||
default={'title': 'Test Tag', 'overrides_watch': True}
|
||||
)
|
||||
tag_copy1 = deepcopy(tag_without_ds)
|
||||
self.assertEqual(tag_copy1['title'], 'Test Tag', "Tag data should be copied")
|
||||
|
||||
# Test 2: Tag with datastore (new pattern for consistency)
|
||||
tag_with_ds = Tag.model(
|
||||
datastore_path='/tmp/test',
|
||||
__datastore=mock_datastore,
|
||||
default={'title': 'Test Tag With DS', 'overrides_watch': True}
|
||||
)
|
||||
|
||||
# Deepcopy should work
|
||||
tag_copy2 = deepcopy(tag_with_ds)
|
||||
|
||||
# Test 3: Dict data is copied
|
||||
self.assertEqual(tag_copy2['title'], 'Test Tag With DS', "Tag data should be copied")
|
||||
|
||||
# Test 4: Modifications to copy don't affect original
|
||||
tag_copy2['title'] = 'MODIFIED'
|
||||
self.assertNotEqual(tag_with_ds['title'], 'MODIFIED',
|
||||
"Modifying copy should not affect original")
|
||||
|
||||
# Test 5: Tag with datastore shares it (doesn't copy it)
|
||||
if hasattr(tag_with_ds, '_datastore'):
|
||||
self.assertIs(tag_copy2._datastore, tag_with_ds._datastore,
|
||||
"Tag should share __datastore reference like Watch does")
|
||||
|
||||
def test_watch_copy_performance(self):
|
||||
"""
|
||||
Verify that our __deepcopy__ implementation doesn't cause performance issues.
|
||||
|
||||
With the fix, deepcopy should be fast because we're sharing datastore
|
||||
instead of copying it.
|
||||
"""
|
||||
import time
|
||||
|
||||
# Create a watch with large datastore (many watches)
|
||||
mock_datastore = {
|
||||
'settings': {'application': {}},
|
||||
'watching': {}
|
||||
}
|
||||
|
||||
# Add 100 watches to the datastore
|
||||
for i in range(100):
|
||||
w = Watch.model(
|
||||
__datastore=mock_datastore,
|
||||
datastore_path='/tmp/test',
|
||||
default={'url': f'https://example{i}.com'}
|
||||
)
|
||||
mock_datastore['watching'][w['uuid']] = w
|
||||
|
||||
# Time how long deepcopy takes
|
||||
watch = list(mock_datastore['watching'].values())[0]
|
||||
|
||||
start = time.time()
|
||||
for _ in range(10):
|
||||
_ = deepcopy(watch)
|
||||
elapsed = time.time() - start
|
||||
|
||||
# Should be fast (< 0.1 seconds for 10 copies)
|
||||
# If datastore was copied, it would take much longer
|
||||
self.assertLess(elapsed, 0.5,
|
||||
f"Deepcopy too slow ({elapsed:.3f}s for 10 copies) - might be copying datastore")
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -6,6 +6,42 @@ from flask import url_for
|
||||
import logging
|
||||
import time
|
||||
import os
|
||||
import threading
|
||||
|
||||
# Thread-safe global storage for test endpoint content
|
||||
# Avoids filesystem cache issues in parallel tests
|
||||
_test_endpoint_content_lock = threading.Lock()
|
||||
_test_endpoint_content = {}
|
||||
|
||||
def write_test_file_and_sync(filepath, content, mode='w'):
|
||||
"""
|
||||
Write test data to file and ensure it's synced to disk.
|
||||
Also stores in thread-safe global dict to bypass filesystem cache.
|
||||
|
||||
Critical for parallel tests where workers may read files immediately after write.
|
||||
Without fsync(), data may still be in OS buffers when workers try to read,
|
||||
causing race conditions where old data is seen.
|
||||
|
||||
Args:
|
||||
filepath: Full path to file
|
||||
content: Content to write (str or bytes)
|
||||
mode: File mode ('w' for text, 'wb' for binary)
|
||||
"""
|
||||
# Convert content to bytes if needed
|
||||
if isinstance(content, str):
|
||||
content_bytes = content.encode('utf-8')
|
||||
else:
|
||||
content_bytes = content
|
||||
|
||||
# Store in thread-safe global dict for instant access
|
||||
with _test_endpoint_content_lock:
|
||||
_test_endpoint_content[os.path.basename(filepath)] = content_bytes
|
||||
|
||||
# Also write to file for compatibility
|
||||
with open(filepath, mode) as f:
|
||||
f.write(content)
|
||||
f.flush() # Flush Python buffer to OS
|
||||
os.fsync(f.fileno()) # Force OS to write to disk
|
||||
|
||||
def set_original_response(datastore_path, extra_title=''):
|
||||
test_return_data = f"""<html>
|
||||
@@ -20,8 +56,7 @@ def set_original_response(datastore_path, extra_title=''):
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
return None
|
||||
|
||||
def set_modified_response(datastore_path):
|
||||
@@ -36,9 +71,7 @@ def set_modified_response(datastore_path):
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
return None
|
||||
def set_longer_modified_response(datastore_path):
|
||||
test_return_data = """<html>
|
||||
@@ -55,9 +88,7 @@ def set_longer_modified_response(datastore_path):
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
return None
|
||||
|
||||
def set_more_modified_response(datastore_path):
|
||||
@@ -73,17 +104,14 @@ def set_more_modified_response(datastore_path):
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
return None
|
||||
|
||||
|
||||
def set_empty_text_response(datastore_path):
|
||||
test_return_data = """<html><body></body></html>"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
|
||||
return None
|
||||
|
||||
@@ -132,46 +160,35 @@ def extract_UUID_from_client(client):
|
||||
return uuid.strip()
|
||||
|
||||
def delete_all_watches(client=None):
|
||||
|
||||
uuids = list(client.application.config.get('DATASTORE').data['watching'])
|
||||
for uuid in uuids:
|
||||
client.application.config.get('DATASTORE').delete(uuid)
|
||||
from changedetectionio.flask_app import update_q
|
||||
|
||||
# Clear the queue to prevent leakage to next test
|
||||
# Use clear() method to ensure both priority_items and notification_queue are drained
|
||||
if hasattr(update_q, 'clear'):
|
||||
update_q.clear()
|
||||
else:
|
||||
# Fallback for old implementation
|
||||
while not update_q.empty():
|
||||
try:
|
||||
update_q.get_nowait()
|
||||
except:
|
||||
break
|
||||
|
||||
time.sleep(0.2)
|
||||
|
||||
def wait_for_all_checks(client=None):
|
||||
"""
|
||||
Waits until the queue is empty and workers are idle.
|
||||
Much faster than the original with adaptive timing.
|
||||
Delegates to worker_pool.wait_for_all_checks for shared logic.
|
||||
"""
|
||||
from changedetectionio.flask_app import update_q as global_update_q
|
||||
from changedetectionio import worker_handler
|
||||
empty_since = None
|
||||
attempt = 0
|
||||
max_attempts = 150 # Still reasonable upper bound
|
||||
from changedetectionio import worker_pool
|
||||
return worker_pool.wait_for_all_checks(global_update_q, timeout=150)
|
||||
|
||||
while attempt < max_attempts:
|
||||
# Start with fast checks, slow down if needed
|
||||
if attempt < 10:
|
||||
time.sleep(0.2) # Very fast initial checks
|
||||
elif attempt < 30:
|
||||
time.sleep(0.4) # Medium speed
|
||||
else:
|
||||
time.sleep(0.8) # Slower for persistent issues
|
||||
|
||||
q_length = global_update_q.qsize()
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
any_workers_busy = len(running_uuids) > 0
|
||||
|
||||
if q_length == 0 and not any_workers_busy:
|
||||
if empty_since is None:
|
||||
empty_since = time.time()
|
||||
# Brief stabilization period for async workers
|
||||
elif time.time() - empty_since >= 0.3:
|
||||
break
|
||||
else:
|
||||
empty_since = None
|
||||
|
||||
attempt += 1
|
||||
time.sleep(0.3)
|
||||
|
||||
def wait_for_watch_history(client, min_history_count=2, timeout=10):
|
||||
"""
|
||||
@@ -220,8 +237,11 @@ def new_live_server_setup(live_server):
|
||||
|
||||
@live_server.app.route('/test-endpoint')
|
||||
def test_endpoint():
|
||||
from loguru import logger
|
||||
logger.debug(f"/test-endpoint hit {request}")
|
||||
# REMOVED: logger.debug() causes file locking between test process and Flask server process
|
||||
# Flask server runs in separate multiprocessing.Process and inherited loguru tries to
|
||||
# write to same log files, causing request handlers to block on file locks
|
||||
# from loguru import logger
|
||||
# logger.debug(f"/test-endpoint hit {request}")
|
||||
ctype = request.args.get('content_type')
|
||||
status_code = request.args.get('status_code')
|
||||
content = request.args.get('content') or None
|
||||
@@ -243,15 +263,35 @@ def new_live_server_setup(live_server):
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
|
||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||
datastore_path = current_app.config.get('TEST_DATASTORE_PATH', 'test-datastore')
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "rb") as f:
|
||||
resp = make_response(f.read(), status_code)
|
||||
if uppercase_headers:
|
||||
resp.headers['CONTENT-TYPE'] = ctype if ctype else 'text/html'
|
||||
else:
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
# Check thread-safe global dict first (instant, no cache issues)
|
||||
# Fall back to file if not in dict (for tests that write directly)
|
||||
with _test_endpoint_content_lock:
|
||||
content_data = _test_endpoint_content.get("endpoint-content.txt")
|
||||
|
||||
if content_data is None:
|
||||
# Not in global dict, read from file
|
||||
datastore_path = current_app.config.get('TEST_DATASTORE_PATH', 'test-datastore')
|
||||
filepath = os.path.join(datastore_path, "endpoint-content.txt")
|
||||
|
||||
# REMOVED: os.sync() was blocking for many seconds during parallel tests
|
||||
# With -n 6+ parallel tests, heavy I/O causes os.sync() to wait for ALL
|
||||
# system writes to complete, causing "Read timed out" errors
|
||||
# File writes from test code are already flushed by the time workers fetch
|
||||
|
||||
try:
|
||||
with open(filepath, "rb") as f:
|
||||
content_data = f.read()
|
||||
except Exception as e:
|
||||
# REMOVED: logger.error() causes file locking in multiprocess context
|
||||
# Just raise the exception directly for debugging
|
||||
raise
|
||||
|
||||
resp = make_response(content_data, status_code)
|
||||
if uppercase_headers:
|
||||
resp.headers['CONTENT-TYPE'] = ctype if ctype else 'text/html'
|
||||
else:
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
except FileNotFoundError:
|
||||
return make_response('', status_code)
|
||||
|
||||
@@ -326,6 +366,12 @@ def new_live_server_setup(live_server):
|
||||
def test_pdf_endpoint():
|
||||
datastore_path = current_app.config.get('TEST_DATASTORE_PATH', 'test-datastore')
|
||||
|
||||
# Force filesystem sync before reading to ensure fresh data
|
||||
try:
|
||||
os.sync()
|
||||
except (AttributeError, PermissionError):
|
||||
pass
|
||||
|
||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||
with open(os.path.join(datastore_path, "endpoint-test.pdf"), "rb") as f:
|
||||
resp = make_response(f.read(), 200)
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
# Translation Guide
|
||||
|
||||
## Updating Translations
|
||||
|
||||
To maintain consistency and minimize unnecessary changes in translation files, run these commands:
|
||||
|
||||
```bash
|
||||
python setup.py extract_messages # Extract translatable strings
|
||||
python setup.py update_catalog # Update all language files
|
||||
python setup.py compile_catalog # Compile to binary .mo files
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
All translation settings are configured in **`../../setup.cfg`** (single source of truth).
|
||||
|
||||
The configuration below is shown for reference - **edit `setup.cfg` to change settings**:
|
||||
|
||||
```ini
|
||||
[extract_messages]
|
||||
# Extract translatable strings from source code
|
||||
mapping_file = babel.cfg
|
||||
output_file = changedetectionio/translations/messages.pot
|
||||
input_paths = changedetectionio
|
||||
keywords = _ _l gettext
|
||||
# Options to reduce unnecessary changes in .pot files
|
||||
sort_by_file = true # Keeps entries ordered by file path
|
||||
width = 120 # Consistent line width (prevents rewrapping)
|
||||
add_location = file # Show file path only (not line numbers)
|
||||
|
||||
[update_catalog]
|
||||
# Update existing .po files with new strings from .pot
|
||||
# Note: 'locale' is omitted - Babel auto-discovers all catalogs in output_dir
|
||||
input_file = changedetectionio/translations/messages.pot
|
||||
output_dir = changedetectionio/translations
|
||||
domain = messages
|
||||
# Options for consistent formatting
|
||||
width = 120 # Consistent line width
|
||||
no_fuzzy_matching = true # Avoids incorrect automatic matches
|
||||
|
||||
[compile_catalog]
|
||||
# Compile .po files to .mo binary format
|
||||
directory = changedetectionio/translations
|
||||
domain = messages
|
||||
```
|
||||
|
||||
**Key formatting options:**
|
||||
- `sort_by_file = true` - Orders entries by file path (consistent ordering)
|
||||
- `width = 120` - Fixed line width prevents text rewrapping
|
||||
- `add_location = file` - Shows file path only, not line numbers (reduces git churn)
|
||||
- `no_fuzzy_matching = true` - Prevents incorrect automatic fuzzy matches
|
||||
|
||||
## Why Use These Commands?
|
||||
|
||||
Running pybabel commands directly without consistent options causes:
|
||||
- ❌ Entries get reordered differently each time
|
||||
- ❌ Text gets rewrapped at different widths
|
||||
- ❌ Line numbers change every edit (if not configured)
|
||||
- ❌ Large diffs that make code review difficult
|
||||
|
||||
Using `python setup.py` commands ensures:
|
||||
- ✅ Consistent ordering (by file path, not alphabetically)
|
||||
- ✅ Consistent line width (120 characters, no rewrapping)
|
||||
- ✅ File-only locations (no line number churn)
|
||||
- ✅ No fuzzy matching (prevents incorrect auto-translations)
|
||||
- ✅ Minimal diffs (only actual changes show up)
|
||||
- ✅ Easier code review and git history
|
||||
|
||||
These commands read settings from `../../setup.cfg` automatically.
|
||||
|
||||
## Supported Languages
|
||||
|
||||
- `cs` - Czech (Čeština)
|
||||
- `de` - German (Deutsch)
|
||||
- `en_GB` - English (UK)
|
||||
- `en_US` - English (US)
|
||||
- `fr` - French (Français)
|
||||
- `it` - Italian (Italiano)
|
||||
- `ko` - Korean (한국어)
|
||||
- `zh` - Chinese Simplified (中文简体)
|
||||
- `zh_Hant_TW` - Chinese Traditional (繁體中文)
|
||||
|
||||
## Adding a New Language
|
||||
|
||||
1. Initialize the new language catalog:
|
||||
```bash
|
||||
pybabel init -i changedetectionio/translations/messages.pot -d changedetectionio/translations -l NEW_LANG_CODE
|
||||
```
|
||||
2. Compile it:
|
||||
```bash
|
||||
python setup.py compile_catalog
|
||||
```
|
||||
|
||||
Babel will auto-discover the new language on subsequent translation updates.
|
||||
|
||||
## Translation Notes
|
||||
|
||||
From CLAUDE.md:
|
||||
- Always use "monitor" or "watcher" terminology (not "clock")
|
||||
- Use the most brief wording suitable
|
||||
- When finding issues in one language, check ALL languages for the same issue
|
||||
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user