mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-02-01 11:56:03 +00:00
Compare commits
21 Commits
screenshot
...
event-loop
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
424e4ec1aa | ||
|
|
c1dca306ad | ||
|
|
e219e8cada | ||
|
|
674d863a21 | ||
|
|
0b9cfcdf09 | ||
|
|
fd820c9330 | ||
|
|
e02a1824c5 | ||
|
|
5911b7fe7a | ||
|
|
a239480272 | ||
|
|
fceb3cf39f | ||
|
|
7f631268dd | ||
|
|
8cc04ca7c5 | ||
|
|
4dec1e017b | ||
|
|
9d1743adbe | ||
|
|
f34d806b09 | ||
|
|
c22335ed01 | ||
|
|
0042f0c36a | ||
|
|
55e14cf394 | ||
|
|
308ccb5841 | ||
|
|
978e17acf6 | ||
|
|
73c29d1fa0 |
4
.github/workflows/pypi-release.yml
vendored
4
.github/workflows/pypi-release.yml
vendored
@@ -61,8 +61,8 @@ jobs:
|
||||
|
||||
# --- API test ---
|
||||
# This also means that the docs/api-spec.yml was shipped and could be read
|
||||
test -f /tmp/url-watches.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json)
|
||||
test -f /tmp/changedetection.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/changedetection.json)
|
||||
echo Test API KEY is $API_KEY
|
||||
curl -X POST "http://127.0.0.1:10000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
|
||||
@@ -111,6 +111,32 @@ jobs:
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
|
||||
- name: Test CLI options
|
||||
run: |
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-cli-opts --network changedet-network test-changedetectionio bash -c 'changedetectionio/test_cli_opts.sh' &> cli-opts-output.txt
|
||||
echo "=== CLI Options Test Output ==="
|
||||
cat cli-opts-output.txt
|
||||
|
||||
- name: CLI Memory Test
|
||||
run: |
|
||||
echo "=== Checking CLI batch mode memory usage ==="
|
||||
# Extract RSS memory value from output
|
||||
RSS_MB=$(grep -oP "Memory consumption before worker shutdown: RSS=\K[\d.]+" cli-opts-output.txt | head -1 || echo "0")
|
||||
echo "RSS Memory: ${RSS_MB} MB"
|
||||
|
||||
# Check if RSS is less than 100MB
|
||||
if [ -n "$RSS_MB" ]; then
|
||||
if (( $(echo "$RSS_MB < 100" | bc -l) )); then
|
||||
echo "✓ Memory usage is acceptable: ${RSS_MB} MB < 100 MB"
|
||||
else
|
||||
echo "✗ Memory usage too high: ${RSS_MB} MB >= 100 MB"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "⚠ Could not extract memory usage, skipping check"
|
||||
fi
|
||||
|
||||
- name: Extract memory report and logs
|
||||
if: always()
|
||||
uses: ./.github/actions/extract-memory-report
|
||||
@@ -125,6 +151,13 @@ jobs:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Store CLI test output
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||
path: cli-opts-output.txt
|
||||
|
||||
# Playwright tests
|
||||
playwright-tests:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -9,6 +9,7 @@ recursive-include changedetectionio/notification *
|
||||
recursive-include changedetectionio/processors *
|
||||
recursive-include changedetectionio/realtime *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/store *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
recursive-include changedetectionio/translations *
|
||||
|
||||
@@ -2,23 +2,24 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.52.8'
|
||||
__version__ = '0.52.9'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
from loguru import logger
|
||||
import getopt
|
||||
import logging
|
||||
import os
|
||||
import getopt
|
||||
import platform
|
||||
import signal
|
||||
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
# Eventlet completely removed - using threading mode for SocketIO
|
||||
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
from loguru import logger
|
||||
# Note: store and changedetection_app are imported inside main() to avoid
|
||||
# initialization before argument parsing (allows --help to work without loading everything)
|
||||
|
||||
# ==============================================================================
|
||||
# Multiprocessing Configuration - CRITICAL for Thread Safety
|
||||
@@ -83,15 +84,26 @@ def get_version():
|
||||
def sigshutdown_handler(_signo, _stack_frame):
|
||||
name = signal.Signals(_signo).name
|
||||
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
|
||||
|
||||
|
||||
# Set exit flag immediately to stop all loops
|
||||
app.config.exit.set()
|
||||
datastore.stop_thread = True
|
||||
|
||||
|
||||
# Log memory consumption before shutting down workers (cross-platform)
|
||||
try:
|
||||
import psutil
|
||||
process = psutil.Process()
|
||||
mem_info = process.memory_info()
|
||||
rss_mb = mem_info.rss / 1024 / 1024
|
||||
vms_mb = mem_info.vms / 1024 / 1024
|
||||
logger.info(f"Memory consumption before worker shutdown: RSS={rss_mb:,.2f} MB, VMS={vms_mb:,.2f} MB")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not retrieve memory stats: {str(e)}")
|
||||
|
||||
# Shutdown workers and queues immediately
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
from changedetectionio import worker_pool
|
||||
worker_pool.shutdown_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down workers: {str(e)}")
|
||||
|
||||
@@ -112,31 +124,85 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down Socket.IO server: {str(e)}")
|
||||
|
||||
# Save data quickly
|
||||
# Save data quickly - force immediate save using abstract method
|
||||
try:
|
||||
datastore.sync_to_json()
|
||||
logger.success('Fast sync to disk complete.')
|
||||
datastore.force_save_all()
|
||||
logger.success('Fast sync to storage complete.')
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing to disk: {str(e)}")
|
||||
logger.error(f"Error syncing to storage: {str(e)}")
|
||||
|
||||
sys.exit()
|
||||
|
||||
def print_help():
|
||||
"""Print help text for command line options"""
|
||||
print('Usage: changedetection.py [options]')
|
||||
print('')
|
||||
print('Standard options:')
|
||||
print(' -s SSL enable')
|
||||
print(' -h HOST Listen host (default: 0.0.0.0)')
|
||||
print(' -p PORT Listen port (default: 5000)')
|
||||
print(' -d PATH Datastore path')
|
||||
print(' -l LEVEL Log level (TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL)')
|
||||
print(' -c Cleanup unused snapshots')
|
||||
print(' -C Create datastore directory if it doesn\'t exist')
|
||||
print(' -P true/false Set all watches paused (true) or active (false)')
|
||||
print('')
|
||||
print('Add URLs on startup:')
|
||||
print(' -u URL Add URL to watch (can be used multiple times)')
|
||||
print(' -u0 \'JSON\' Set options for first -u URL (e.g. \'{"processor":"text_json_diff"}\')')
|
||||
print(' -u1 \'JSON\' Set options for second -u URL (0-indexed)')
|
||||
print(' -u2 \'JSON\' Set options for third -u URL, etc.')
|
||||
print(' Available options: processor, fetch_backend, headers, method, etc.')
|
||||
print(' See model/Watch.py for all available options')
|
||||
print('')
|
||||
print('Recheck on startup:')
|
||||
print(' -r all Queue all watches for recheck on startup')
|
||||
print(' -r UUID,... Queue specific watches (comma-separated UUIDs)')
|
||||
print(' -r all N Queue all watches, wait for completion, repeat N times')
|
||||
print(' -r UUID,... N Queue specific watches, wait for completion, repeat N times')
|
||||
print('')
|
||||
print('Batch mode:')
|
||||
print(' -b Run in batch mode (process queue then exit)')
|
||||
print(' Useful for CI/CD, cron jobs, or one-time checks')
|
||||
print(' NOTE: Batch mode checks if Flask is running and aborts if port is in use')
|
||||
print(' Use -p PORT to specify a different port if needed')
|
||||
print('')
|
||||
|
||||
def main():
|
||||
global datastore
|
||||
global app
|
||||
|
||||
# Early help/version check before any initialization
|
||||
if '--help' in sys.argv or '-help' in sys.argv:
|
||||
print_help()
|
||||
sys.exit(0)
|
||||
|
||||
if '--version' in sys.argv or '-v' in sys.argv:
|
||||
print(f'changedetection.io {__version__}')
|
||||
sys.exit(0)
|
||||
|
||||
# Import heavy modules after help/version checks to keep startup fast for those flags
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
# Optional URL to watch since start
|
||||
default_url = None
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
include_default_watches = True
|
||||
all_paused = None # None means don't change, True/False to set
|
||||
|
||||
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
|
||||
port = int(os.environ.get('PORT', 5000))
|
||||
ssl_mode = False
|
||||
|
||||
# Lists for multiple URLs and their options
|
||||
urls_to_add = []
|
||||
url_options = {} # Key: index (0-based), Value: dict of options
|
||||
recheck_watches = None # None, 'all', or list of UUIDs
|
||||
recheck_repeat_count = 1 # Number of times to repeat recheck cycle
|
||||
batch_mode = False # Run once then exit when queue is empty
|
||||
|
||||
# On Windows, create and use a default path.
|
||||
if os.name == 'nt':
|
||||
datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
|
||||
@@ -145,10 +211,68 @@ def main():
|
||||
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
|
||||
datastore_path = os.path.join(os.getcwd(), "../datastore")
|
||||
|
||||
# Pre-process arguments to extract -u, -u<N>, and -r options before getopt
|
||||
# This allows unlimited -u0, -u1, -u2, ... options without predefining them
|
||||
cleaned_argv = ['changedetection.py'] # Start with program name
|
||||
i = 1
|
||||
while i < len(sys.argv):
|
||||
arg = sys.argv[i]
|
||||
|
||||
# Handle -u (add URL)
|
||||
if arg == '-u' and i + 1 < len(sys.argv):
|
||||
urls_to_add.append(sys.argv[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -u<N> (set options for URL at index N)
|
||||
if arg.startswith('-u') and len(arg) > 2 and arg[2:].isdigit():
|
||||
idx = int(arg[2:])
|
||||
if i + 1 < len(sys.argv):
|
||||
try:
|
||||
import json
|
||||
url_options[idx] = json.loads(sys.argv[i + 1])
|
||||
except json.JSONDecodeError as e:
|
||||
print(f'Error: Invalid JSON for {arg}: {sys.argv[i + 1]}')
|
||||
print(f'JSON decode error: {e}')
|
||||
sys.exit(2)
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -r (recheck watches)
|
||||
if arg == '-r' and i + 1 < len(sys.argv):
|
||||
recheck_arg = sys.argv[i + 1]
|
||||
if recheck_arg.lower() == 'all':
|
||||
recheck_watches = 'all'
|
||||
else:
|
||||
# Parse comma-separated list of UUIDs
|
||||
recheck_watches = [uuid.strip() for uuid in recheck_arg.split(',') if uuid.strip()]
|
||||
|
||||
# Check for optional repeat count as third argument
|
||||
if i + 2 < len(sys.argv) and sys.argv[i + 2].isdigit():
|
||||
recheck_repeat_count = int(sys.argv[i + 2])
|
||||
if recheck_repeat_count < 1:
|
||||
print(f'Error: Repeat count must be at least 1, got {recheck_repeat_count}')
|
||||
sys.exit(2)
|
||||
i += 3
|
||||
else:
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -b (batch mode - run once and exit)
|
||||
if arg == '-b':
|
||||
batch_mode = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Keep other arguments for getopt
|
||||
cleaned_argv.append(arg)
|
||||
i += 1
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
|
||||
opts, args = getopt.getopt(cleaned_argv[1:], "6Ccsd:h:p:l:P:", "port")
|
||||
except getopt.GetoptError as e:
|
||||
print_help()
|
||||
print(f'Error: {e}')
|
||||
sys.exit(2)
|
||||
|
||||
create_datastore_dir = False
|
||||
@@ -173,10 +297,6 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
if opt == '-u':
|
||||
default_url = arg
|
||||
include_default_watches = False
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
@@ -188,6 +308,18 @@ def main():
|
||||
if opt == '-l':
|
||||
logger_level = int(arg) if arg.isdigit() else arg.upper()
|
||||
|
||||
if opt == '-P':
|
||||
try:
|
||||
all_paused = bool(strtobool(arg))
|
||||
except ValueError:
|
||||
print(f'Error: Invalid value for -P option: {arg}')
|
||||
print('Expected: true, false, yes, no, 1, or 0')
|
||||
sys.exit(2)
|
||||
|
||||
# If URLs are provided, don't include default watches
|
||||
if urls_to_add:
|
||||
include_default_watches = False
|
||||
|
||||
|
||||
logger.success(f"changedetection.io version {get_version()} starting.")
|
||||
# Launch using SocketIO run method for proper integration (if enabled)
|
||||
@@ -224,11 +356,16 @@ def main():
|
||||
logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING)
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
app_config = {
|
||||
'datastore_path': datastore_path,
|
||||
'batch_mode': batch_mode,
|
||||
'recheck_watches': recheck_watches,
|
||||
'recheck_repeat_count': recheck_repeat_count
|
||||
}
|
||||
|
||||
if not os.path.isdir(app_config['datastore_path']):
|
||||
if create_datastore_dir:
|
||||
os.mkdir(app_config['datastore_path'])
|
||||
os.makedirs(app_config['datastore_path'], exist_ok=True)
|
||||
else:
|
||||
logger.critical(
|
||||
f"ERROR: Directory path for the datastore '{app_config['datastore_path']}'"
|
||||
@@ -245,15 +382,209 @@ def main():
|
||||
logger.critical(str(e))
|
||||
return
|
||||
|
||||
# Apply all_paused setting if specified via CLI
|
||||
if all_paused is not None:
|
||||
datastore.data['settings']['application']['all_paused'] = all_paused
|
||||
logger.info(f"Setting all watches paused: {all_paused}")
|
||||
|
||||
# Inject datastore into plugins that need access to settings
|
||||
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
|
||||
inject_datastore_into_plugins(datastore)
|
||||
|
||||
if default_url:
|
||||
datastore.add_watch(url = default_url)
|
||||
# Step 1: Add URLs with their options (if provided via -u flags)
|
||||
added_watch_uuids = []
|
||||
if urls_to_add:
|
||||
logger.info(f"Adding {len(urls_to_add)} URL(s) from command line")
|
||||
for idx, url in enumerate(urls_to_add):
|
||||
extras = url_options.get(idx, {})
|
||||
if extras:
|
||||
logger.debug(f"Adding watch {idx}: {url} with options: {extras}")
|
||||
else:
|
||||
logger.debug(f"Adding watch {idx}: {url}")
|
||||
|
||||
new_uuid = datastore.add_watch(url=url, extras=extras)
|
||||
if new_uuid:
|
||||
added_watch_uuids.append(new_uuid)
|
||||
logger.success(f"Added watch: {url} (UUID: {new_uuid})")
|
||||
else:
|
||||
logger.error(f"Failed to add watch: {url}")
|
||||
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Step 2: Queue newly added watches (if -u was provided in batch mode)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if batch_mode and added_watch_uuids:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
logger.info(f"Batch mode: Queuing {len(added_watch_uuids)} newly added watches")
|
||||
for watch_uuid in added_watch_uuids:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
logger.debug(f"Queued newly added watch: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
|
||||
# Step 3: Queue watches for recheck (if -r was provided)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if recheck_watches is not None:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
# Queue all watches, excluding those already queued in batch mode
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids:
|
||||
# Exclude newly added watches that were already queued in batch mode
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
logger.info(f"Queuing {len(watches_to_queue)} existing watches for recheck ({len(added_watch_uuids)} newly added watches already queued)")
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Queuing all {len(watches_to_queue)} watches for recheck")
|
||||
else:
|
||||
# Queue specific UUIDs
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Queuing {len(watches_to_queue)} specific watches for recheck")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
logger.debug(f"Queued watch for recheck: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
|
||||
logger.success(f"Successfully queued {queued_count} watches for recheck")
|
||||
|
||||
# Step 4: Setup batch mode monitor (if -b was provided)
|
||||
if batch_mode:
|
||||
from changedetectionio.flask_app import update_q
|
||||
|
||||
# Safety check: Ensure Flask app is not already running on this port
|
||||
# Batch mode should never run alongside the web server
|
||||
import socket
|
||||
test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
|
||||
try:
|
||||
# Try to bind to the configured host:port (no SO_REUSEADDR - strict check)
|
||||
test_socket.bind((host, port))
|
||||
test_socket.close()
|
||||
logger.debug(f"Batch mode: Port {port} is available (Flask app not running)")
|
||||
except OSError as e:
|
||||
test_socket.close()
|
||||
# errno 98 = EADDRINUSE (Linux)
|
||||
# errno 48 = EADDRINUSE (macOS)
|
||||
# errno 10048 = WSAEADDRINUSE (Windows)
|
||||
if e.errno in (48, 98, 10048) or "Address already in use" in str(e) or "already in use" in str(e).lower():
|
||||
logger.critical(f"ERROR: Batch mode cannot run - port {port} is already in use")
|
||||
logger.critical(f"The Flask web server appears to be running on {host}:{port}")
|
||||
logger.critical(f"Batch mode is designed for standalone operation (CI/CD, cron jobs, etc.)")
|
||||
logger.critical(f"Please either stop the Flask web server, or use a different port with -p PORT")
|
||||
sys.exit(1)
|
||||
else:
|
||||
# Some other socket error - log but continue (might be network configuration issue)
|
||||
logger.warning(f"Port availability check failed with unexpected error: {e}")
|
||||
logger.warning(f"Continuing with batch mode anyway - be aware of potential conflicts")
|
||||
|
||||
def queue_watches_for_recheck(datastore, iteration):
|
||||
"""Helper function to queue watches for recheck"""
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids and iteration == 1:
|
||||
# Only exclude newly added watches on first iteration
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing all {len(watches_to_queue)} watches")
|
||||
elif recheck_watches:
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing {len(watches_to_queue)} specific watches")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
logger.success(f"Batch mode (iteration {iteration}): Successfully queued {queued_count} watches")
|
||||
return queued_count
|
||||
|
||||
def batch_mode_monitor():
|
||||
"""Monitor queue and workers, shutdown or repeat when work is complete"""
|
||||
import time
|
||||
|
||||
# Track iterations if repeat mode is enabled
|
||||
current_iteration = 1
|
||||
total_iterations = recheck_repeat_count if recheck_watches and recheck_repeat_count > 1 else 1
|
||||
|
||||
if total_iterations > 1:
|
||||
logger.info(f"Batch mode: Will repeat recheck {total_iterations} times")
|
||||
else:
|
||||
logger.info("Batch mode: Waiting for all queued items to complete...")
|
||||
|
||||
# Wait a bit for workers to start processing
|
||||
time.sleep(3)
|
||||
|
||||
try:
|
||||
while current_iteration <= total_iterations:
|
||||
logger.info(f"Batch mode: Waiting for iteration {current_iteration}/{total_iterations} to complete...")
|
||||
|
||||
# Use the shared wait_for_all_checks function
|
||||
completed = worker_pool.wait_for_all_checks(update_q, timeout=300)
|
||||
|
||||
if not completed:
|
||||
logger.warning(f"Batch mode: Iteration {current_iteration} timed out after 300 seconds")
|
||||
|
||||
logger.success(f"Batch mode: Iteration {current_iteration}/{total_iterations} completed")
|
||||
|
||||
# Check if we need to repeat
|
||||
if current_iteration < total_iterations:
|
||||
logger.info(f"Batch mode: Starting iteration {current_iteration + 1}...")
|
||||
current_iteration += 1
|
||||
|
||||
# Re-queue watches for next iteration
|
||||
queue_watches_for_recheck(datastore, current_iteration)
|
||||
|
||||
# Brief pause before continuing
|
||||
time.sleep(2)
|
||||
else:
|
||||
# All iterations complete
|
||||
logger.success(f"Batch mode: All {total_iterations} iterations completed, initiating shutdown")
|
||||
# Trigger shutdown
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch mode monitor error: {e}")
|
||||
logger.error(f"Initiating emergency shutdown")
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
|
||||
# Start monitor in background thread
|
||||
monitor_thread = threading.Thread(target=batch_mode_monitor, daemon=True, name="BatchModeMonitor")
|
||||
monitor_thread.start()
|
||||
logger.info("Batch mode enabled: Will exit after all queued items are processed")
|
||||
|
||||
# Get the SocketIO instance from the Flask app (created in flask_app.py)
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
global socketio
|
||||
@@ -311,23 +642,43 @@ def main():
|
||||
if os.getenv('USE_X_SETTINGS'):
|
||||
logger.info("USE_X_SETTINGS is ENABLED")
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
||||
app.wsgi_app = ProxyFix(
|
||||
app.wsgi_app,
|
||||
x_for=1, # X-Forwarded-For (client IP)
|
||||
x_proto=1, # X-Forwarded-Proto (http/https)
|
||||
x_host=1, # X-Forwarded-Host (original host)
|
||||
x_port=1, # X-Forwarded-Port (original port)
|
||||
x_prefix=1 # X-Forwarded-Prefix (URL prefix)
|
||||
)
|
||||
|
||||
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
# In batch mode, skip starting the HTTP server - just keep workers running
|
||||
if batch_mode:
|
||||
logger.info("Batch mode: Skipping HTTP server startup, workers will process queue")
|
||||
logger.info("Batch mode: Main thread will wait for shutdown signal")
|
||||
# Keep main thread alive until batch monitor triggers shutdown
|
||||
try:
|
||||
while True:
|
||||
time.sleep(1)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Batch mode: Keyboard interrupt received")
|
||||
pass
|
||||
else:
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
# Normal mode: Start HTTP server
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
@@ -42,7 +42,7 @@ class Tag(Resource):
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
return {'status': f'OK, queued {len(watches_to_queue)} watches for rechecking'}, 200
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
@@ -50,7 +50,7 @@ class Tag(Resource):
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
try:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
logger.info(f"Background queueing complete for tag {tag['uuid']}: {len(watches_to_queue)} watches queued")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing for tag {tag['uuid']}: {e}")
|
||||
@@ -96,6 +96,16 @@ class Tag(Resource):
|
||||
if not tag:
|
||||
abort(404, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in request.json:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = request.json.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
tag.update(request.json)
|
||||
self.datastore.needs_write_urgent = True
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
from . import auth
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from flask import request, make_response, send_from_directory
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
@@ -85,7 +85,7 @@ class Watch(Resource):
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
if request.args.get('recheck'):
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return "OK", 200
|
||||
if request.args.get('paused', '') == 'paused':
|
||||
self.datastore.data['watching'].get(uuid).pause()
|
||||
@@ -140,6 +140,16 @@ class Watch(Resource):
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in request.json:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = request.json.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
# XSS etc protection - validate URL if it's being updated
|
||||
if 'url' in request.json:
|
||||
new_url = request.json.get('url')
|
||||
@@ -444,6 +454,16 @@ class CreateWatch(Resource):
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in json_data:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
notification_urls = json_data.get('notification_urls', [])
|
||||
validate_notification_urls(notification_urls)
|
||||
except ValidationError as e:
|
||||
return str(e), 400
|
||||
|
||||
extras = copy.deepcopy(json_data)
|
||||
|
||||
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
|
||||
@@ -457,7 +477,7 @@ class CreateWatch(Resource):
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||
if new_uuid:
|
||||
# Dont queue because the scheduler will check that it hasnt been checked before anyway
|
||||
# worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
# worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported URL", 400
|
||||
@@ -494,7 +514,7 @@ class CreateWatch(Resource):
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = [
|
||||
@@ -504,7 +524,7 @@ class CreateWatch(Resource):
|
||||
|
||||
# Queue only the filtered watches
|
||||
for uuid in watches_to_queue_filtered:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
@@ -516,7 +536,7 @@ class CreateWatch(Resource):
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_all_watches_background():
|
||||
"""Background thread to queue all watches - discarded after completion."""
|
||||
@@ -526,7 +546,7 @@ class CreateWatch(Resource):
|
||||
for uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if uuid not in queued_uuids and uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
@@ -27,11 +27,23 @@ def create_backup(datastore_path, watches: dict):
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8) as zipObj:
|
||||
|
||||
# Add the index
|
||||
zipObj.write(os.path.join(datastore_path, "url-watches.json"), arcname="url-watches.json")
|
||||
# Add the settings file (supports both formats)
|
||||
# New format: changedetection.json
|
||||
changedetection_json = os.path.join(datastore_path, "changedetection.json")
|
||||
if os.path.isfile(changedetection_json):
|
||||
zipObj.write(changedetection_json, arcname="changedetection.json")
|
||||
logger.debug("Added changedetection.json to backup")
|
||||
|
||||
# Add the flask app secret
|
||||
zipObj.write(os.path.join(datastore_path, "secret.txt"), arcname="secret.txt")
|
||||
# Legacy format: url-watches.json (for backward compatibility)
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
if os.path.isfile(url_watches_json):
|
||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Add the flask app secret (if it exists)
|
||||
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||
if os.path.isfile(secret_file):
|
||||
zipObj.write(secret_file, arcname="secret.txt")
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in watches.items():
|
||||
@@ -90,8 +102,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Maximum number of backups reached, please remove some"), "error")
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
# Be sure we're written fresh - force immediate save using abstract method
|
||||
datastore.force_save_all()
|
||||
zip_thread = threading.Thread(
|
||||
target=create_backup,
|
||||
args=(datastore.datastore_path, datastore.data.get("watching")),
|
||||
|
||||
@@ -14,7 +14,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
from changedetectionio import forms
|
||||
#
|
||||
if request.method == 'POST':
|
||||
# from changedetectionio import worker_handler
|
||||
# from changedetectionio import worker_pool
|
||||
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
@@ -31,7 +31,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs")
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
if len(importer_handler.remaining_data) == 0:
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -45,7 +45,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# XLSX importer
|
||||
@@ -70,7 +70,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
|
||||
@@ -62,7 +62,7 @@ class import_url_list(Importer):
|
||||
extras = None
|
||||
if processor:
|
||||
extras = {'processor': processor}
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, write_to_disk_now=False, extras=extras)
|
||||
new_uuid = datastore.add_watch(url=url.strip(), tag=tags, save_immediately=False, extras=extras)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
@@ -129,7 +129,7 @@ class import_distill_io_json(Importer):
|
||||
new_uuid = datastore.add_watch(url=d['uri'].strip(),
|
||||
tag=",".join(d.get('tags', [])),
|
||||
extras=extras,
|
||||
write_to_disk_now=False)
|
||||
save_immediately=False)
|
||||
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
@@ -204,7 +204,7 @@ class import_xlsx_wachete(Importer):
|
||||
new_uuid = datastore.add_watch(url=data['url'].strip(),
|
||||
extras=extras,
|
||||
tag=data.get('folder'),
|
||||
write_to_disk_now=False)
|
||||
save_immediately=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
@@ -287,7 +287,7 @@ class import_xlsx_custom(Importer):
|
||||
new_uuid = datastore.add_watch(url=url,
|
||||
extras=extras,
|
||||
tag=tags,
|
||||
write_to_disk_now=False)
|
||||
save_immediately=False)
|
||||
if new_uuid:
|
||||
# Straight into the queue.
|
||||
self.new_uuids.append(new_uuid)
|
||||
|
||||
@@ -4,7 +4,7 @@ from flask import Blueprint, flash, redirect, url_for
|
||||
from flask_login import login_required
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from queue import PriorityQueue
|
||||
|
||||
PRICE_DATA_TRACK_ACCEPT = 'accepted'
|
||||
@@ -20,7 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
@login_required
|
||||
|
||||
@@ -83,7 +83,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
# Adjust worker count if it changed
|
||||
if new_worker_count != old_worker_count:
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
|
||||
|
||||
# Check CPU core availability and warn if worker count is high
|
||||
@@ -92,7 +92,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Warning: Worker count ({}) is close to or exceeds available CPU cores ({})").format(
|
||||
new_worker_count, cpu_count), 'warning')
|
||||
|
||||
result = worker_handler.adjust_async_worker_count(
|
||||
result = worker_pool.adjust_async_worker_count(
|
||||
new_count=new_worker_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
|
||||
@@ -80,6 +80,16 @@
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">{{ _('When a request returns no content, or the HTML does not contain any text, is this considered a change?') }}</span>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">{{ _('Choose a default proxy for all watches') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
@@ -340,15 +350,6 @@ nav
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
<span class="pure-form-message-inline">{{ _('"Name" will be used for selecting the proxy in the Watch Edit settings') }}</span><br>
|
||||
<span class="pure-form-message-inline">{{ _('SOCKS5 proxies with authentication are only supported with \'plain requests\' fetcher, for other fetchers you should whitelist the IP access instead') }}</span>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">{{ _('Choose a default proxy for all watches') }}</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group" id="extra-browsers-setting">
|
||||
<p>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import time
|
||||
import threading
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session, current_app
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
@@ -10,7 +10,7 @@ from changedetectionio.blueprint.ui.notification import construct_blueprint as c
|
||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||
from changedetectionio.blueprint.ui import diff, preview
|
||||
|
||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
from flask import request, flash
|
||||
|
||||
if op == 'delete':
|
||||
@@ -24,6 +24,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['paused'] = True
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches paused").format(len(uuids)))
|
||||
|
||||
@@ -31,6 +32,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches unpaused").format(len(uuids)))
|
||||
|
||||
@@ -45,6 +47,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = True
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches muted").format(len(uuids)))
|
||||
|
||||
@@ -52,6 +55,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_muted'] = False
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches un-muted").format(len(uuids)))
|
||||
|
||||
@@ -59,7 +63,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Recheck and require a full reprocessing
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches queued for rechecking").format(len(uuids)))
|
||||
|
||||
@@ -67,6 +71,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]["last_error"] = False
|
||||
datastore.mark_watch_dirty(uuid)
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches errors cleared").format(len(uuids)))
|
||||
|
||||
@@ -109,7 +114,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update):
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool, queuedWatchMetaData, watch_check_update):
|
||||
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
||||
|
||||
# Register the edit blueprint
|
||||
@@ -241,7 +246,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
new_uuid = datastore.clone(uuid)
|
||||
|
||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
|
||||
flash(gettext('Cloned, you are editing the new watch.'))
|
||||
|
||||
@@ -257,10 +262,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
if uuid:
|
||||
# Single watch - check if already queued or running
|
||||
if worker_handler.is_watch_running(uuid) or uuid in update_q.get_queued_uuids():
|
||||
if worker_pool.is_watch_running(uuid) or uuid in update_q.get_queued_uuids():
|
||||
flash(gettext("Watch is already queued or being checked."))
|
||||
else:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
else:
|
||||
# Multiple watches - first count how many need to be queued
|
||||
@@ -279,7 +284,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = []
|
||||
@@ -289,7 +294,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
# Queue only the filtered watches
|
||||
for watch_uuid in watches_to_queue_filtered:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
@@ -305,7 +310,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
# 20+ watches - queue in background thread to avoid blocking HTTP response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_watches_background():
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
@@ -315,7 +320,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
for watch_uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if watch_uuid not in queued_uuids and watch_uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
@@ -344,7 +349,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
extra_data=extra_data,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
uuids=uuids,
|
||||
worker_handler=worker_handler,
|
||||
worker_pool=worker_pool,
|
||||
update_q=update_q,
|
||||
watch_check_update=watch_check_update,
|
||||
op=op,
|
||||
@@ -404,4 +409,25 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/language/auto-detect", methods=['GET'])
|
||||
def delete_locale_language_session_var_if_it_exists():
|
||||
"""Clear the session locale preference to auto-detect from browser Accept-Language header"""
|
||||
if 'locale' in session:
|
||||
session.pop('locale', None)
|
||||
# Refresh Flask-Babel to clear cached locale
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
flash(gettext("Language set to auto-detect from browser"))
|
||||
|
||||
# Check if there's a redirect parameter to return to the same page
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, use it
|
||||
from changedetectionio.is_safe_url import is_safe_url
|
||||
if redirect_url and is_safe_url(redirect_url, current_app):
|
||||
return redirect(redirect_url)
|
||||
|
||||
# Otherwise redirect to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return ui_blueprint
|
||||
@@ -9,7 +9,7 @@ from jinja2 import Environment, FileSystemLoader
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio.time_handler import is_within_schedule
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||
@@ -283,7 +283,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
#############################
|
||||
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -314,7 +314,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
||||
|
||||
c = [f"processor-{watch.get('processor')}"]
|
||||
if worker_handler.is_watch_running(uuid):
|
||||
if worker_pool.is_watch_running(uuid):
|
||||
c.append('checking-now')
|
||||
|
||||
template_args = {
|
||||
|
||||
@@ -2,7 +2,7 @@ from flask import Blueprint, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
|
||||
@@ -33,7 +33,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||
else:
|
||||
# Straight into the queue.
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
flash(gettext("Watch added."))
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
||||
|
||||
@@ -223,7 +223,7 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<span class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</span>
|
||||
<a href="{{url_for('watchlist.index', tag=watch_tag_uuid) }}" class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</a>
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
|
||||
@@ -71,10 +71,19 @@ class Fetcher():
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
# Screenshot element locking - prevents layout shifts during screenshot capture
|
||||
# Only needed for visual comparison (image_ssim_diff processor)
|
||||
# Locks element dimensions in the first viewport to prevent headers/ads from resizing
|
||||
lock_viewport_elements = False # Default: disabled for performance
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
# Allow lock_viewport_elements to be set via kwargs
|
||||
if kwargs and 'lock_viewport_elements' in kwargs:
|
||||
self.lock_viewport_elements = kwargs.get('lock_viewport_elements')
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
|
||||
@@ -8,20 +8,24 @@ from loguru import logger
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable, \
|
||||
BrowserStepsStepException
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport_size
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
|
||||
logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
||||
@@ -29,25 +33,31 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport_size['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled (took {lock_time:.2f}s)")
|
||||
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport_size['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
@@ -69,7 +79,11 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
# Restore original viewport size
|
||||
@@ -81,40 +95,54 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
|
||||
# If we have multiple chunks, stitch them together
|
||||
if len(screenshot_chunks) > 1:
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
|
||||
# Only use separate process for many chunks (4+) to avoid blocking the event loop
|
||||
if len(screenshot_chunks) <= 3:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
|
||||
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
else:
|
||||
# Use separate process for many chunks to avoid blocking
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
# Explicit cleanup
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
# Explicit cleanup
|
||||
del screenshot_chunks
|
||||
screenshot_chunks = None
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
|
||||
return screenshot_chunks[0]
|
||||
|
||||
@@ -184,7 +212,8 @@ class fetcher(Fetcher):
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
watch_uuid = getattr(self, 'watch_uuid', None)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Request GC immediately after screenshot to free memory
|
||||
# Screenshots can be large and browser steps take many of them
|
||||
@@ -233,6 +262,7 @@ class fetcher(Fetcher):
|
||||
import playwright._impl._errors
|
||||
import time
|
||||
self.delete_browser_steps_screenshots()
|
||||
self.watch_uuid = watch_uuid # Store for use in screenshot_step
|
||||
response = None
|
||||
|
||||
async with async_playwright() as p:
|
||||
@@ -318,7 +348,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
# Cleanup before raising to prevent memory leak
|
||||
await self.page.close()
|
||||
await context.close()
|
||||
@@ -337,7 +367,16 @@ class fetcher(Fetcher):
|
||||
try:
|
||||
# Run Browser Steps here
|
||||
if self.browser_steps_get_valid_steps():
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
try:
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
except BrowserStepsStepException:
|
||||
try:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
except Exception as e:
|
||||
# Fine, could be messy situation
|
||||
pass
|
||||
raise
|
||||
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
|
||||
@@ -374,14 +413,16 @@ class fetcher(Fetcher):
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - screenshots are large and base64 decode creates temporary buffers
|
||||
await self.page.request_gc()
|
||||
gc.collect()
|
||||
|
||||
except ScreenshotUnavailable:
|
||||
# Re-raise screenshot unavailable exceptions
|
||||
raise
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
|
||||
finally:
|
||||
# Request garbage collection one more time before closing
|
||||
try:
|
||||
|
||||
@@ -20,18 +20,20 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
|
||||
logger.debug(f"{watch_info}Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
@@ -50,20 +52,35 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
if page_height > page.viewport['height']:
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page() changes viewport height which triggers @media (min-height) rules
|
||||
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
file_read_start = time.time()
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
file_read_time = time.time() - file_read_start
|
||||
|
||||
evaluate_start = time.time()
|
||||
await page.evaluate(lock_elements_js)
|
||||
evaluate_time = time.time() - evaluate_start
|
||||
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled - File read: {file_read_time:.3f}s, Browser evaluate: {evaluate_time:.2f}s, Total: {lock_time:.2f}s")
|
||||
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
@@ -82,7 +99,11 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
@@ -93,26 +114,53 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
screenshot_chunks = None
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot_chunks[0]
|
||||
|
||||
|
||||
@@ -357,7 +405,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
@@ -387,7 +435,17 @@ class fetcher(Fetcher):
|
||||
|
||||
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - pyppeteer base64 decode creates temporary buffers
|
||||
import gc
|
||||
gc.collect()
|
||||
# Release C-level memory from base64 decode back to OS
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Lock Element Dimensions for Screenshot Capture
|
||||
* Lock Element Dimensions for Screenshot Capture (First Viewport Only)
|
||||
*
|
||||
* THE PROBLEM:
|
||||
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
|
||||
@@ -10,40 +10,31 @@
|
||||
* However, changing the viewport height triggers CSS media queries like:
|
||||
* @media (min-height: 860px) { .ad { height: 250px; } }
|
||||
*
|
||||
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
|
||||
* - Screenshot shows element at NEW size (after media query triggered)
|
||||
* - xpath element coordinates measured at OLD size (before viewport change)
|
||||
* - Visual selector overlays don't align with screenshot
|
||||
*
|
||||
* EXAMPLE BUG:
|
||||
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
|
||||
* - Viewport changes to 1280x3809 for screenshot
|
||||
* - Media query triggers: ad expands to 250px
|
||||
* - All content below shifts down by 112px (250-138)
|
||||
* - Article now at position: 391px (279+112)
|
||||
* - But xpath data says 279px → 112px mismatch! ✗
|
||||
* This causes elements (especially ads/headers) to resize during screenshot capture.
|
||||
*
|
||||
* THE SOLUTION:
|
||||
* Before changing viewport, lock ALL element dimensions with !important inline styles.
|
||||
* Inline styles with !important override media query CSS, preventing layout changes.
|
||||
* Lock element dimensions in the FIRST VIEWPORT ONLY with !important inline styles.
|
||||
* This prevents headers, navigation, and top ads from resizing when viewport changes.
|
||||
* We only lock the visible portion because:
|
||||
* - Most layout shifts happen in headers/navbars/top ads
|
||||
* - Locking only visible elements is 100x+ faster (100-200 elements vs 10,000+)
|
||||
* - Below-fold content shifts don't affect visual comparison accuracy
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Iterates through every element on the page
|
||||
* 2. Captures current computed dimensions (width, height)
|
||||
* 3. Sets inline styles with !important to freeze those dimensions
|
||||
* 1. Gets current viewport height
|
||||
* 2. Finds elements within first viewport (top of page to bottom of screen)
|
||||
* 3. Locks their dimensions with !important inline styles
|
||||
* 4. Disables ResizeObserver API (for JS-based resizing)
|
||||
* 5. When viewport changes for screenshot, media queries can't resize anything
|
||||
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
|
||||
* The page must be fully loaded and settled at its initial viewport size.
|
||||
* No need to restore state afterward - page is closed after screenshot.
|
||||
* Only enabled for image_ssim_diff processor (visual comparison).
|
||||
* Default: OFF for performance.
|
||||
*
|
||||
* PERFORMANCE:
|
||||
* - Iterates all DOM elements (can be 1000s on complex pages)
|
||||
* - Typically completes in 50-200ms
|
||||
* - One-time cost before screenshot, well worth it for coordinate accuracy
|
||||
* - Only processes 100-300 elements (first viewport) vs 10,000+ (entire page)
|
||||
* - Typically completes in 10-50ms
|
||||
* - 100x+ faster than locking entire page
|
||||
*
|
||||
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
|
||||
*/
|
||||
@@ -52,11 +43,34 @@
|
||||
// Store original styles in a global WeakMap for later restoration
|
||||
window.__elementSizingRestore = new WeakMap();
|
||||
|
||||
// Lock ALL element dimensions to prevent media query layout changes
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const rect = el.getBoundingClientRect();
|
||||
const start = performance.now();
|
||||
|
||||
// Get current viewport height (visible portion of page)
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
// Get all elements and filter to FIRST VIEWPORT ONLY
|
||||
// This dramatically reduces elements to process (100-300 vs 10,000+)
|
||||
const allElements = Array.from(document.querySelectorAll('*'));
|
||||
|
||||
// BATCH READ PHASE: Get bounding rects and filter to viewport
|
||||
const measurements = allElements.map(el => {
|
||||
const rect = el.getBoundingClientRect();
|
||||
const computed = window.getComputedStyle(el);
|
||||
|
||||
// Only lock elements in the first viewport (visible on initial page load)
|
||||
// rect.top < viewportHeight means element starts within visible area
|
||||
const inViewport = rect.top < viewportHeight && rect.top >= 0;
|
||||
const hasSize = rect.height > 0 && rect.width > 0;
|
||||
|
||||
return inViewport && hasSize ? { el, computed, rect } : null;
|
||||
}).filter(Boolean); // Remove null entries
|
||||
|
||||
const elapsed = performance.now() - start;
|
||||
console.log(`Locked first viewport elements: ${measurements.length} of ${allElements.length} total elements (viewport height: ${viewportHeight}px, took ${elapsed.toFixed(0)}ms)`);
|
||||
|
||||
// BATCH WRITE PHASE: Apply all inline styles without triggering layout
|
||||
// No interleaved reads means browser can optimize style application
|
||||
measurements.forEach(({el, computed, rect}) => {
|
||||
// Save original inline style values BEFORE locking
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
const originalStyles = {};
|
||||
@@ -89,5 +103,5 @@
|
||||
disconnect() {}
|
||||
};
|
||||
|
||||
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
|
||||
console.log(`✓ Element dimensions locked (${measurements.length} elements) to prevent media query changes during screenshot`);
|
||||
})();
|
||||
|
||||
@@ -8,92 +8,42 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
||||
|
||||
# Cache font to avoid loading on every stitch
|
||||
_cached_font = None
|
||||
|
||||
def _get_caption_font():
|
||||
"""Get or create cached font for caption text."""
|
||||
global _cached_font
|
||||
if _cached_font is None:
|
||||
from PIL import ImageFont
|
||||
try:
|
||||
_cached_font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
_cached_font = ImageFont.load_default()
|
||||
return _cached_font
|
||||
|
||||
|
||||
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together inline (no multiprocessing).
|
||||
Optimized for small number of chunks (2-3) to avoid process creation overhead.
|
||||
|
||||
Args:
|
||||
chunks_bytes: List of JPEG image bytes
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
|
||||
Returns:
|
||||
bytes: Stitched JPEG image
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
# Create stitched image
|
||||
stitched = Image.new('RGB', (max_width, total_height))
|
||||
y_offset = 0
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font = _get_caption_font()
|
||||
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode to JPEG
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result = output.getvalue()
|
||||
|
||||
# Cleanup
|
||||
stitched.close()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
||||
def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
Used for large number of chunks (4+) to avoid blocking the main event loop.
|
||||
|
||||
Uses spawn multiprocessing to isolate PIL's C-level memory allocation.
|
||||
When the subprocess exits, the OS reclaims ALL memory including C-level allocations
|
||||
that Python's GC cannot release. This prevents the ~50MB per stitch from accumulating
|
||||
in the main process.
|
||||
|
||||
Trade-off: Adds 35MB resource_tracker subprocess, but prevents 500MB+ memory leak
|
||||
in main process (much better at scale: 35GB vs 500GB for 1000 instances).
|
||||
|
||||
Args:
|
||||
pipe_conn: Pipe connection to receive data and send result
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
import struct
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
try:
|
||||
# Receive chunk count as 4-byte integer (no pickle!)
|
||||
count_bytes = pipe_conn.recv_bytes()
|
||||
chunk_count = struct.unpack('I', count_bytes)[0]
|
||||
|
||||
# Receive each chunk as raw bytes (no pickle!)
|
||||
chunks_bytes = []
|
||||
for _ in range(chunk_count):
|
||||
chunks_bytes.append(pipe_conn.recv_bytes())
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
del chunks_bytes
|
||||
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
@@ -103,15 +53,14 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
im.close()
|
||||
del images
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
|
||||
# Try to load font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
@@ -120,23 +69,26 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode and send image with optimization
|
||||
# Encode and send
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
pipe_conn.send_bytes(output.getvalue())
|
||||
result_bytes = output.getvalue()
|
||||
|
||||
stitched.close()
|
||||
del stitched
|
||||
output.close()
|
||||
del output
|
||||
|
||||
pipe_conn.send_bytes(result_bytes)
|
||||
del result_bytes
|
||||
|
||||
except Exception as e:
|
||||
pipe_conn.send(f"error:{e}")
|
||||
logger.error(f"Error in stitch_images_worker_raw_bytes: {e}")
|
||||
error_msg = f"error:{e}".encode('utf-8')
|
||||
pipe_conn.send_bytes(error_msg)
|
||||
finally:
|
||||
pipe_conn.close()
|
||||
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from pathlib import Path
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
from flask import (
|
||||
Flask,
|
||||
@@ -94,6 +94,14 @@ if os.getenv('FLASK_SERVER_NAME'):
|
||||
app.config['BABEL_TRANSLATION_DIRECTORIES'] = str(Path(__file__).parent / 'translations')
|
||||
app.config['BABEL_DEFAULT_LOCALE'] = 'en_GB'
|
||||
|
||||
# Session configuration
|
||||
# NOTE: Flask session (for locale, etc.) is separate from Flask-Login's remember-me cookie
|
||||
# - Flask session stores data like session['locale'] in a signed cookie
|
||||
# - Flask-Login's remember=True creates a separate authentication cookie
|
||||
# - Setting PERMANENT_SESSION_LIFETIME controls how long the Flask session cookie lasts
|
||||
from datetime import timedelta
|
||||
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=3650) # ~10 years (effectively unlimited)
|
||||
|
||||
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
|
||||
|
||||
|
||||
@@ -187,7 +195,7 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
return worker_handler.is_watch_running(watch_obj['uuid'])
|
||||
return worker_pool.is_watch_running(watch_obj['uuid'])
|
||||
|
||||
@app.template_global('get_watch_queue_position')
|
||||
def _get_watch_queue_position(watch_obj):
|
||||
@@ -198,13 +206,13 @@ def _get_watch_queue_position(watch_obj):
|
||||
@app.template_global('get_current_worker_count')
|
||||
def _get_current_worker_count():
|
||||
"""Get the current number of operational workers"""
|
||||
return worker_handler.get_worker_count()
|
||||
return worker_pool.get_worker_count()
|
||||
|
||||
@app.template_global('get_worker_status_info')
|
||||
def _get_worker_status_info():
|
||||
"""Get detailed worker status information for display"""
|
||||
status = worker_handler.get_worker_status()
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
status = worker_pool.get_worker_status()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
return {
|
||||
'count': status['worker_count'],
|
||||
@@ -393,7 +401,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE'] = datastore_o
|
||||
|
||||
|
||||
# Store batch mode flag to skip background threads when running in batch mode
|
||||
app.config['batch_mode'] = config.get('batch_mode', False) if config else False
|
||||
|
||||
# Store the signal in the app config to ensure it's accessible everywhere
|
||||
app.config['watch_check_update_SIGNAL'] = watch_check_update
|
||||
|
||||
@@ -550,6 +561,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# Validate the locale against available languages
|
||||
if locale in language_codes:
|
||||
# Make session permanent so language preference persists across browser sessions
|
||||
# NOTE: This is the Flask session cookie (separate from Flask-Login's remember-me auth cookie)
|
||||
session.permanent = True
|
||||
session['locale'] = locale
|
||||
|
||||
# CRITICAL: Flask-Babel caches the locale in the request context (ctx.babel_locale)
|
||||
@@ -787,13 +801,15 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# watchlist UI buttons etc
|
||||
import changedetectionio.blueprint.ui as ui
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update))
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_pool, queuedWatchMetaData, watch_check_update))
|
||||
|
||||
import changedetectionio.blueprint.watchlist as watchlist
|
||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||
|
||||
# Initialize Socket.IO server conditionally based on settings
|
||||
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
if socket_io_enabled and app.config.get('batch_mode'):
|
||||
socket_io_enabled = False
|
||||
if socket_io_enabled:
|
||||
from changedetectionio.realtime.socket_server import init_socketio
|
||||
global socketio_server
|
||||
@@ -822,10 +838,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
|
||||
# Get basic status
|
||||
status = worker_handler.get_worker_status()
|
||||
status = worker_pool.get_worker_status()
|
||||
|
||||
# Perform health check
|
||||
health_result = worker_handler.check_worker_health(
|
||||
health_result = worker_pool.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -889,16 +905,31 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Can be overridden by ENV or use the default settings
|
||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
logger.info(f"Starting {n_workers} workers during app initialization")
|
||||
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
worker_pool.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
threading.Thread(target=notification_runner, daemon=True, name="NotificationRunner").start()
|
||||
# Skip background threads in batch mode (just process queue and exit)
|
||||
batch_mode = app.config.get('batch_mode', False)
|
||||
if not batch_mode:
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
# Start configurable number of notification workers (default 1)
|
||||
notification_workers = int(os.getenv("NOTIFICATION_WORKERS", "1"))
|
||||
for i in range(notification_workers):
|
||||
threading.Thread(
|
||||
target=notification_runner,
|
||||
args=(i,),
|
||||
daemon=True,
|
||||
name=f"NotificationRunner-{i}"
|
||||
).start()
|
||||
logger.info(f"Started {notification_workers} notification worker(s)")
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
else:
|
||||
logger.info("Batch mode: Skipping ticker thread, notification runner, and version checker")
|
||||
|
||||
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
|
||||
# This avoids circular dependencies
|
||||
@@ -933,14 +964,14 @@ def check_for_new_version():
|
||||
app.config.exit.wait(86400)
|
||||
|
||||
|
||||
def notification_runner():
|
||||
def notification_runner(worker_id=0):
|
||||
global notification_debug_log
|
||||
from datetime import datetime
|
||||
import json
|
||||
with app.app_context():
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
# At the moment only one thread runs (single runner)
|
||||
# Multiple workers can run concurrently (configurable via NOTIFICATION_WORKERS)
|
||||
n_object = notification_q.get(block=False)
|
||||
except queue.Empty:
|
||||
app.config.exit.wait(1)
|
||||
@@ -966,7 +997,7 @@ def notification_runner():
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
logger.error(f"Notification worker {worker_id} - Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
|
||||
# UUID wont be present when we submit a 'test' from the global settings
|
||||
if 'uuid' in n_object:
|
||||
@@ -1007,7 +1038,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
now = time.time()
|
||||
if now - last_health_check > 60:
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
health_result = worker_handler.check_worker_health(
|
||||
health_result = worker_pool.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -1026,7 +1057,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
continue
|
||||
|
||||
# Get a list of watches by UUID that are currently fetching data
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
# Build set of queued UUIDs once for O(1) lookup instead of O(n) per watch
|
||||
queued_uuids = {q_item.item['uuid'] for q_item in update_q.queue}
|
||||
@@ -1132,7 +1163,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
priority = int(time.time())
|
||||
|
||||
# Into the queue with you
|
||||
queued_successfully = worker_handler.queue_item_async_safe(update_q,
|
||||
queued_successfully = worker_pool.queue_item_async_safe(update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=priority,
|
||||
item={'uuid': uuid})
|
||||
)
|
||||
|
||||
@@ -20,8 +20,9 @@ mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86
|
||||
|
||||
def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
Save compressed data using native brotli.
|
||||
Testing shows no memory leak when using gc.collect() after compression.
|
||||
Save compressed data using native brotli with streaming compression.
|
||||
Uses chunked compression to minimize peak memory usage and malloc_trim()
|
||||
to force release of C-level memory back to the OS.
|
||||
|
||||
Args:
|
||||
contents: data to compress (str or bytes)
|
||||
@@ -37,27 +38,52 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
import brotli
|
||||
import gc
|
||||
import ctypes
|
||||
|
||||
# Ensure contents are bytes
|
||||
if isinstance(contents, str):
|
||||
contents = contents.encode('utf-8')
|
||||
|
||||
try:
|
||||
logger.debug(f"Starting brotli compression of {len(contents)} bytes.")
|
||||
original_size = len(contents)
|
||||
logger.debug(f"Starting brotli streaming compression of {original_size} bytes.")
|
||||
|
||||
if mode is not None:
|
||||
compressed_data = brotli.compress(contents, mode=mode)
|
||||
else:
|
||||
compressed_data = brotli.compress(contents)
|
||||
# Create streaming compressor
|
||||
compressor = brotli.Compressor(quality=6, mode=mode if mode is not None else brotli.MODE_GENERIC)
|
||||
|
||||
# Stream compress in chunks to minimize memory usage
|
||||
chunk_size = 65536 # 64KB chunks
|
||||
total_compressed_size = 0
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(compressed_data)
|
||||
# Process data in chunks
|
||||
offset = 0
|
||||
while offset < len(contents):
|
||||
chunk = contents[offset:offset + chunk_size]
|
||||
compressed_chunk = compressor.process(chunk)
|
||||
if compressed_chunk:
|
||||
f.write(compressed_chunk)
|
||||
total_compressed_size += len(compressed_chunk)
|
||||
offset += chunk_size
|
||||
|
||||
logger.debug(f"Finished brotli compression - From {len(contents)} to {len(compressed_data)} bytes.")
|
||||
# Finalize compression - critical for proper cleanup
|
||||
final_chunk = compressor.finish()
|
||||
if final_chunk:
|
||||
f.write(final_chunk)
|
||||
total_compressed_size += len(final_chunk)
|
||||
|
||||
# Force garbage collection to prevent memory buildup
|
||||
logger.debug(f"Finished brotli compression - From {original_size} to {total_compressed_size} bytes.")
|
||||
|
||||
# Cleanup: Delete compressor, force Python GC, then force C-level memory release
|
||||
del compressor
|
||||
gc.collect()
|
||||
|
||||
# Force release of C-level memory back to OS (since brotli is a C library)
|
||||
try:
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass # malloc_trim not available on all systems (e.g., macOS)
|
||||
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -5,51 +5,52 @@ import heapq
|
||||
import queue
|
||||
import threading
|
||||
|
||||
try:
|
||||
import janus
|
||||
except ImportError:
|
||||
logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus")
|
||||
raise
|
||||
# Janus is no longer required - we use pure threading.Queue for multi-loop support
|
||||
# try:
|
||||
# import janus
|
||||
# except ImportError:
|
||||
# pass # Not needed anymore
|
||||
|
||||
|
||||
class RecheckPriorityQueue:
|
||||
"""
|
||||
Ultra-reliable priority queue using janus for async/sync bridging.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers (the actual fetchers/processors) and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
|
||||
- Synchronous code (Flask, threads) cannot use async methods without blocking
|
||||
- Async code cannot use sync methods without blocking the event loop
|
||||
- janus provides the only safe bridge between these two worlds
|
||||
|
||||
Attempting to unify to async-only would require:
|
||||
- Converting all Flask routes to async (major breaking change)
|
||||
- Using asyncio.run() in sync contexts (causes deadlocks)
|
||||
- Thread-pool wrapping (adds complexity and overhead)
|
||||
|
||||
Minimal implementation focused on reliability:
|
||||
- Pure janus for sync/async bridge
|
||||
- Thread-safe priority ordering
|
||||
- Bulletproof error handling with critical logging
|
||||
Thread-safe priority queue supporting multiple async event loops.
|
||||
|
||||
ARCHITECTURE:
|
||||
- Multiple async workers, each with its own event loop in its own thread
|
||||
- One shared queue accessed safely via threading primitives
|
||||
- Workers use run_in_executor to access queue without blocking their event loop
|
||||
|
||||
IMPLEMENTATION:
|
||||
- Pure threading.Queue for notifications (no event loop binding)
|
||||
- Heapq-based priority storage (thread-safe with RLock)
|
||||
- Async methods wrap sync methods via run_in_executor
|
||||
- Supports both sync and async access patterns
|
||||
|
||||
WHY NOT JANUS:
|
||||
- Janus binds to ONE event loop at creation time
|
||||
- Our architecture has 15+ workers, each with separate event loops
|
||||
- Workers in different threads/loops cannot share janus async interface
|
||||
- Pure threading approach works across all event loops
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, ticker thread
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
|
||||
# Use pure threading.Queue for notification - no janus needed
|
||||
# This avoids event loop binding issues with multiple worker threads
|
||||
import asyncio
|
||||
self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)
|
||||
|
||||
# Priority storage - thread-safe
|
||||
self._priority_items = []
|
||||
self._lock = threading.RLock()
|
||||
|
||||
|
||||
# Condition variable for async wait support
|
||||
self._condition = threading.Condition(self._lock)
|
||||
|
||||
# Signals for UI updates
|
||||
self.queue_length_signal = signal('queue_length')
|
||||
|
||||
|
||||
logger.debug("RecheckPriorityQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
|
||||
@@ -59,24 +60,23 @@ class RecheckPriorityQueue:
|
||||
def put(self, item, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with priority ordering"""
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
# Add to priority storage and notify waiters
|
||||
with self._condition:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus sync queue
|
||||
self.sync_q.put(True, block=block, timeout=timeout)
|
||||
|
||||
self._notification_queue.put(True, block=False) # Notification only
|
||||
self._condition.notify_all() # Wake up any async waiters
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
|
||||
logger.trace(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
# Remove from priority storage if put failed
|
||||
try:
|
||||
with self._lock:
|
||||
with self._condition:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
@@ -86,10 +86,10 @@ class RecheckPriorityQueue:
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get with priority ordering"""
|
||||
import queue
|
||||
import queue as queue_module
|
||||
try:
|
||||
# Wait for notification
|
||||
self.sync_q.get(block=block, timeout=timeout)
|
||||
# Wait for notification (this doesn't return the actual item, just signals availability)
|
||||
self._notification_queue.get(block=block, timeout=timeout)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
@@ -104,57 +104,53 @@ class RecheckPriorityQueue:
|
||||
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except queue.Empty:
|
||||
except queue_module.Empty:
|
||||
# Queue is empty with timeout - expected behavior, re-raise without logging
|
||||
raise
|
||||
raise # noqa
|
||||
except Exception as e:
|
||||
# Re-raise without logging - caller (worker) will handle and log appropriately
|
||||
raise
|
||||
|
||||
# ASYNC INTERFACE (for workers)
|
||||
async def async_put(self, item):
|
||||
"""Pure async put with priority ordering"""
|
||||
async def async_put(self, item, executor=None):
|
||||
"""Async put with priority ordering - uses thread pool to avoid blocking
|
||||
|
||||
Args:
|
||||
item: Item to add to queue
|
||||
executor: Optional ThreadPoolExecutor. If None, uses default pool.
|
||||
"""
|
||||
import asyncio
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus async queue
|
||||
await self.async_q.put(True)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
# Use run_in_executor to call sync put without blocking event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
executor, # Use provided executor or default
|
||||
lambda: self.put(item, block=True, timeout=5.0)
|
||||
)
|
||||
|
||||
logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get with priority ordering"""
|
||||
async def async_get(self, executor=None):
|
||||
"""Async get with priority ordering - uses thread pool to avoid blocking
|
||||
|
||||
Args:
|
||||
executor: Optional ThreadPoolExecutor. If None, uses default pool.
|
||||
With many workers (30+), pass custom executor scaled to worker count.
|
||||
"""
|
||||
import asyncio
|
||||
try:
|
||||
# Wait for notification
|
||||
await self.async_q.get()
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
# Use run_in_executor to call sync get without blocking event loop
|
||||
# This works across multiple event loops since it uses threading
|
||||
loop = asyncio.get_event_loop()
|
||||
item = await loop.run_in_executor(
|
||||
executor, # Use provided executor (scales with FETCH_WORKERS) or default
|
||||
lambda: self.get(block=True, timeout=1.0)
|
||||
)
|
||||
|
||||
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
@@ -187,9 +183,9 @@ class RecheckPriorityQueue:
|
||||
return []
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
"""Close the queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
# Nothing to close for threading.Queue
|
||||
logger.debug("RecheckPriorityQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")
|
||||
@@ -363,12 +359,11 @@ class NotificationQueue:
|
||||
|
||||
def __init__(self, maxsize: int = 0, datastore=None):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, threads
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
# Use pure threading.Queue to avoid event loop binding issues
|
||||
self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)
|
||||
self.notification_event_signal = signal('notification_event')
|
||||
self.datastore = datastore # For checking all_muted setting
|
||||
self._lock = threading.RLock()
|
||||
logger.debug("NotificationQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
|
||||
@@ -386,7 +381,8 @@ class NotificationQueue:
|
||||
logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
|
||||
return False
|
||||
|
||||
self.sync_q.put(item, block=block, timeout=timeout)
|
||||
with self._lock:
|
||||
self._notification_queue.put(item, block=block, timeout=timeout)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
@@ -394,36 +390,49 @@ class NotificationQueue:
|
||||
logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_put(self, item: Dict[str, Any]):
|
||||
"""Pure async put with signal emission"""
|
||||
async def async_put(self, item: Dict[str, Any], executor=None):
|
||||
"""Async put with signal emission - uses thread pool
|
||||
|
||||
Args:
|
||||
item: Notification item to queue
|
||||
executor: Optional ThreadPoolExecutor
|
||||
"""
|
||||
import asyncio
|
||||
try:
|
||||
# Check if all notifications are muted
|
||||
if self.datastore and self.datastore.data['settings']['application'].get('all_muted', False):
|
||||
logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
|
||||
return False
|
||||
|
||||
await self.async_q.put(item)
|
||||
self._emit_notification_signal(item)
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(executor, lambda: self.put(item, block=True, timeout=5.0))
|
||||
logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get"""
|
||||
try:
|
||||
return self.sync_q.get(block=block, timeout=timeout)
|
||||
with self._lock:
|
||||
return self._notification_queue.get(block=block, timeout=timeout)
|
||||
except queue.Empty as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {str(e)}")
|
||||
raise e
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get"""
|
||||
|
||||
async def async_get(self, executor=None):
|
||||
"""Async get - uses thread pool
|
||||
|
||||
Args:
|
||||
executor: Optional ThreadPoolExecutor
|
||||
"""
|
||||
import asyncio
|
||||
try:
|
||||
return await self.async_q.get()
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(executor, lambda: self.get(block=True, timeout=1.0))
|
||||
except queue.Empty as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -433,19 +442,20 @@ class NotificationQueue:
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
return self.sync_q.qsize()
|
||||
with self._lock:
|
||||
return self._notification_queue.qsize()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
|
||||
return 0
|
||||
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
"""Close the queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
# Nothing to close for threading.Queue
|
||||
logger.debug("NotificationQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")
|
||||
|
||||
@@ -37,9 +37,9 @@ def register_watch_operation_handlers(socketio, datastore):
|
||||
# Import here to avoid circular imports
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
logger.info(f"Socket.IO: Queued recheck for watch {uuid}")
|
||||
else:
|
||||
emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'})
|
||||
|
||||
@@ -145,10 +145,10 @@ def handle_watch_update(socketio, **kwargs):
|
||||
# Emit the watch update to all connected clients
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio.flask_app import _jinja2_filter_datetime
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
# Get list of watches that are currently running
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
# Get list of watches in the queue (efficient single-lock method)
|
||||
queue_list = update_q.get_queued_uuids()
|
||||
@@ -252,7 +252,7 @@ def init_socketio(app, datastore):
|
||||
def event_checkbox_operations(data):
|
||||
from changedetectionio.blueprint.ui import _handle_operations
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, watch_check_update
|
||||
import threading
|
||||
|
||||
@@ -268,7 +268,7 @@ def init_socketio(app, datastore):
|
||||
uuids=data.get('uuids'),
|
||||
datastore=datastore,
|
||||
extra_data=data.get('extra_data'),
|
||||
worker_handler=worker_handler,
|
||||
worker_pool=worker_pool,
|
||||
update_q=update_q,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
watch_check_update=watch_check_update,
|
||||
|
||||
@@ -1,19 +1,25 @@
|
||||
{
|
||||
"name": "",
|
||||
"short_name": "",
|
||||
"name": "ChangeDetection.io",
|
||||
"short_name": "ChangeDetect",
|
||||
"description": "Self-hosted website change detection and monitoring",
|
||||
"icons": [
|
||||
{
|
||||
"src": "android-chrome-192x192.png",
|
||||
"sizes": "192x192",
|
||||
"type": "image/png"
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
},
|
||||
{
|
||||
"src": "android-chrome-256x256.png",
|
||||
"sizes": "256x256",
|
||||
"type": "image/png"
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
}
|
||||
],
|
||||
"theme_color": "#ffffff",
|
||||
"start_url": "/",
|
||||
"theme_color": "#5bbad5",
|
||||
"background_color": "#ffffff",
|
||||
"display": "standalone"
|
||||
"display": "standalone",
|
||||
"categories": ["utilities", "productivity"],
|
||||
"orientation": "any"
|
||||
}
|
||||
|
||||
@@ -222,6 +222,19 @@ code {
|
||||
color: var(--color-white);
|
||||
background: var(--color-text-watch-tag-list);
|
||||
@extend .inline-tag;
|
||||
|
||||
/* Remove default anchor styling when used as links */
|
||||
text-decoration: none;
|
||||
|
||||
&:hover {
|
||||
text-decoration: none;
|
||||
opacity: 0.8;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
&:visited {
|
||||
color: var(--color-white);
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
975
changedetectionio/store/__init__.py
Normal file
975
changedetectionio/store/__init__.py
Normal file
@@ -0,0 +1,975 @@
|
||||
import shutil
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
from flask import (
|
||||
flash
|
||||
)
|
||||
from flask_babel import gettext
|
||||
|
||||
from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
from ..model import App, Watch, USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from copy import deepcopy, copy
|
||||
from os import path, unlink
|
||||
from threading import Lock
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import uuid as uuid_builder
|
||||
from loguru import logger
|
||||
from blinker import signal
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
|
||||
HAS_ORJSON = True
|
||||
except ImportError:
|
||||
HAS_ORJSON = False
|
||||
|
||||
from ..processors import get_custom_watch_obj_for_processor
|
||||
from ..processors.restock_diff import Restock
|
||||
|
||||
# Import the base class and helpers
|
||||
from .file_saving_datastore import FileSavingDataStore, load_all_watches, save_watch_atomic, save_json_atomic
|
||||
from .updates import DatastoreUpdatesMixin
|
||||
from .legacy_loader import has_legacy_datastore
|
||||
|
||||
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
|
||||
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
|
||||
|
||||
dictfilt = lambda x, y: dict([(i, x[i]) for i in x if i in set(y)])
|
||||
|
||||
|
||||
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
|
||||
# Open a github issue if you know something :)
|
||||
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
|
||||
class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
__version_check = True
|
||||
|
||||
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
|
||||
# Initialize parent class
|
||||
super().__init__()
|
||||
|
||||
# Should only be active for docker
|
||||
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
|
||||
self.datastore_path = datastore_path
|
||||
self.needs_write = False
|
||||
self.start_time = time.time()
|
||||
self.stop_thread = False
|
||||
self.save_version_copy_json_db(version_tag)
|
||||
self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag)
|
||||
|
||||
def save_version_copy_json_db(self, version_tag):
|
||||
"""
|
||||
Create version-tagged backup of changedetection.json.
|
||||
|
||||
This is called on version upgrades to preserve a backup in case
|
||||
the new version has issues.
|
||||
"""
|
||||
import re
|
||||
|
||||
version_text = re.sub(r'\D+', '-', version_tag)
|
||||
db_path = os.path.join(self.datastore_path, "changedetection.json")
|
||||
db_path_version_backup = os.path.join(self.datastore_path, f"changedetection-{version_text}.json")
|
||||
|
||||
if not os.path.isfile(db_path_version_backup) and os.path.isfile(db_path):
|
||||
from shutil import copyfile
|
||||
logger.info(f"Backing up changedetection.json due to new version to '{db_path_version_backup}'.")
|
||||
copyfile(db_path, db_path_version_backup)
|
||||
|
||||
def _load_settings(self):
|
||||
"""
|
||||
Load settings from storage.
|
||||
|
||||
File backend implementation: reads from changedetection.json
|
||||
|
||||
Returns:
|
||||
dict: Settings data loaded from storage
|
||||
"""
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
|
||||
logger.info(f"Loading settings from {changedetection_json}")
|
||||
|
||||
if HAS_ORJSON:
|
||||
with open(changedetection_json, 'rb') as f:
|
||||
return orjson.loads(f.read())
|
||||
else:
|
||||
with open(changedetection_json, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
def _apply_settings(self, settings_data):
|
||||
"""
|
||||
Apply loaded settings data to internal data structure.
|
||||
|
||||
Args:
|
||||
settings_data: Dictionary loaded from changedetection.json
|
||||
"""
|
||||
# Apply top-level fields
|
||||
if 'app_guid' in settings_data:
|
||||
self.__data['app_guid'] = settings_data['app_guid']
|
||||
if 'build_sha' in settings_data:
|
||||
self.__data['build_sha'] = settings_data['build_sha']
|
||||
if 'version_tag' in settings_data:
|
||||
self.__data['version_tag'] = settings_data['version_tag']
|
||||
|
||||
# Apply settings sections
|
||||
if 'settings' in settings_data:
|
||||
if 'headers' in settings_data['settings']:
|
||||
self.__data['settings']['headers'].update(settings_data['settings']['headers'])
|
||||
if 'requests' in settings_data['settings']:
|
||||
self.__data['settings']['requests'].update(settings_data['settings']['requests'])
|
||||
if 'application' in settings_data['settings']:
|
||||
self.__data['settings']['application'].update(settings_data['settings']['application'])
|
||||
|
||||
def _rehydrate_tags(self):
|
||||
"""Rehydrate tag entities from stored data."""
|
||||
for uuid, tag in self.__data['settings']['application']['tags'].items():
|
||||
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(
|
||||
uuid, tag, processor_override='restock_diff'
|
||||
)
|
||||
logger.info(f"Tag: {uuid} {tag['title']}")
|
||||
|
||||
|
||||
def _load_state(self):
|
||||
"""
|
||||
Load complete datastore state from storage.
|
||||
|
||||
Orchestrates loading of settings and watches using polymorphic methods.
|
||||
"""
|
||||
# Load settings
|
||||
settings_data = self._load_settings()
|
||||
self._apply_settings(settings_data)
|
||||
|
||||
# Load watches (polymorphic - parent class method)
|
||||
self._load_watches()
|
||||
|
||||
# Rehydrate tags
|
||||
self._rehydrate_tags()
|
||||
|
||||
def reload_state(self, datastore_path, include_default_watches, version_tag):
|
||||
"""
|
||||
Load datastore from storage or create new one.
|
||||
|
||||
Supports two scenarios:
|
||||
1. NEW format: changedetection.json exists → load and run updates if needed
|
||||
2. EMPTY: No changedetection.json → create new OR trigger migration from legacy
|
||||
|
||||
Note: Legacy url-watches.json migration happens in update_26, not here.
|
||||
"""
|
||||
logger.info(f"Datastore path is '{datastore_path}'")
|
||||
|
||||
# Initialize data structure
|
||||
self.__data = App.model()
|
||||
self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")
|
||||
|
||||
# Base definition for all watchers (deepcopy part of #569)
|
||||
self.generic_definition = deepcopy(Watch.model(datastore_path=datastore_path, default={}))
|
||||
|
||||
# Load build SHA if available (Docker deployments)
|
||||
if path.isfile('changedetectionio/source.txt'):
|
||||
with open('changedetectionio/source.txt') as f:
|
||||
self.__data['build_sha'] = f.read()
|
||||
|
||||
# Check if datastore already exists
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
|
||||
if os.path.exists(changedetection_json):
|
||||
# Load existing datastore (changedetection.json + watch.json files)
|
||||
logger.info("Loading existing datastore")
|
||||
try:
|
||||
self._load_state()
|
||||
except Exception as e:
|
||||
logger.critical(f"Failed to load datastore: {e}")
|
||||
raise
|
||||
|
||||
# Run schema updates if needed
|
||||
# Pass current schema version from loaded datastore (defaults to 0 if not set)
|
||||
current_schema = self.__data['settings']['application'].get('schema_version', 0)
|
||||
self.run_updates(current_schema_version=current_schema)
|
||||
|
||||
else:
|
||||
# No datastore yet - check if this is a fresh install or legacy migration
|
||||
# Generate app_guid FIRST (required for all operations)
|
||||
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
|
||||
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
|
||||
else:
|
||||
self.__data['app_guid'] = str(uuid_builder.uuid4())
|
||||
|
||||
# Generate RSS access token
|
||||
self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
|
||||
|
||||
# Generate API access token
|
||||
self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
|
||||
|
||||
# Check if legacy datastore exists (url-watches.json)
|
||||
if has_legacy_datastore(self.datastore_path):
|
||||
# Legacy datastore detected - trigger migration
|
||||
logger.critical(f"Legacy datastore detected at {self.datastore_path}/url-watches.json")
|
||||
logger.critical("Migration will be triggered via update_26")
|
||||
|
||||
# Load the legacy datastore to get its schema_version
|
||||
from .legacy_loader import load_legacy_format
|
||||
legacy_path = os.path.join(self.datastore_path, "url-watches.json")
|
||||
with open(legacy_path) as f:
|
||||
self.__data = json.load(f)
|
||||
|
||||
if not self.__data:
|
||||
raise Exception("Failed to load legacy datastore from url-watches.json")
|
||||
|
||||
# update_26 will load the legacy data again and migrate to new format
|
||||
# Only run updates AFTER the legacy schema version (e.g., if legacy is at 25, only run 26+)
|
||||
self.run_updates()
|
||||
|
||||
|
||||
else:
|
||||
# Fresh install - create new datastore
|
||||
logger.critical(f"No datastore found, creating new datastore at {self.datastore_path}")
|
||||
|
||||
# Set schema version to latest (no updates needed)
|
||||
updates_available = self.get_updates_available()
|
||||
self.__data['settings']['application']['schema_version'] = updates_available.pop() if updates_available else 26
|
||||
|
||||
# Add default watches if requested
|
||||
if include_default_watches:
|
||||
self.add_watch(
|
||||
url='https://news.ycombinator.com/',
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'}
|
||||
)
|
||||
self.add_watch(
|
||||
url='https://changedetection.io/CHANGELOG.txt',
|
||||
tag='changedetection.io',
|
||||
extras={'fetch_backend': 'html_requests'}
|
||||
)
|
||||
|
||||
# Create changedetection.json immediately
|
||||
try:
|
||||
self._save_settings()
|
||||
logger.info("Created changedetection.json for new datastore")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create initial changedetection.json: {e}")
|
||||
|
||||
# Set version tag
|
||||
self.__data['version_tag'] = version_tag
|
||||
|
||||
# Validate proxies.json if it exists
|
||||
_ = self.proxy_list # Just to test parsing
|
||||
|
||||
# Ensure app_guid exists (for datastores loaded from existing files)
|
||||
if 'app_guid' not in self.__data:
|
||||
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
|
||||
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
|
||||
else:
|
||||
self.__data['app_guid'] = str(uuid_builder.uuid4())
|
||||
self.mark_settings_dirty()
|
||||
|
||||
# Ensure RSS access token exists
|
||||
if not self.__data['settings']['application'].get('rss_access_token'):
|
||||
secret = secrets.token_hex(16)
|
||||
self.__data['settings']['application']['rss_access_token'] = secret
|
||||
self.mark_settings_dirty()
|
||||
|
||||
# Ensure API access token exists
|
||||
if not self.__data['settings']['application'].get('api_access_token'):
|
||||
secret = secrets.token_hex(16)
|
||||
self.__data['settings']['application']['api_access_token'] = secret
|
||||
self.mark_settings_dirty()
|
||||
|
||||
# Handle password reset lockfile
|
||||
password_reset_lockfile = os.path.join(self.datastore_path, "removepassword.lock")
|
||||
if path.isfile(password_reset_lockfile):
|
||||
self.remove_password()
|
||||
unlink(password_reset_lockfile)
|
||||
|
||||
# Start the background save thread
|
||||
self.start_save_thread()
|
||||
|
||||
def rehydrate_entity(self, uuid, entity, processor_override=None):
|
||||
"""Set the dict back to the dict Watch object"""
|
||||
entity['uuid'] = uuid
|
||||
|
||||
if processor_override:
|
||||
watch_class = get_custom_watch_obj_for_processor(processor_override)
|
||||
entity['processor'] = processor_override
|
||||
else:
|
||||
watch_class = get_custom_watch_obj_for_processor(entity.get('processor'))
|
||||
|
||||
if entity.get('processor') != 'text_json_diff':
|
||||
logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}")
|
||||
|
||||
entity = watch_class(datastore_path=self.datastore_path, default=entity)
|
||||
return entity
|
||||
|
||||
# ============================================================================
|
||||
# FileSavingDataStore Abstract Method Implementations
|
||||
# ============================================================================
|
||||
|
||||
def _watch_exists(self, uuid):
|
||||
"""Check if watch exists in datastore."""
|
||||
return uuid in self.__data['watching']
|
||||
|
||||
def _get_watch_dict(self, uuid):
|
||||
"""Get watch as dictionary."""
|
||||
return dict(self.__data['watching'][uuid])
|
||||
|
||||
def _build_settings_data(self):
|
||||
"""
|
||||
Build settings data structure for saving.
|
||||
|
||||
Returns:
|
||||
dict: Settings data ready for serialization
|
||||
"""
|
||||
return {
|
||||
'note': 'Settings file - watches are stored in individual {uuid}/watch.json files',
|
||||
'app_guid': self.__data['app_guid'],
|
||||
'settings': self.__data['settings'],
|
||||
'build_sha': self.__data.get('build_sha'),
|
||||
'version_tag': self.__data.get('version_tag')
|
||||
}
|
||||
|
||||
def _save_settings(self):
|
||||
"""
|
||||
Save settings to storage.
|
||||
|
||||
File backend implementation: saves to changedetection.json
|
||||
Implementation of abstract method from FileSavingDataStore.
|
||||
Uses the generic save_json_atomic helper.
|
||||
|
||||
Raises:
|
||||
OSError: If disk is full or other I/O error
|
||||
"""
|
||||
settings_data = self._build_settings_data()
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
save_json_atomic(changedetection_json, settings_data, label="settings", max_size_mb=10)
|
||||
|
||||
def _load_watches(self):
|
||||
"""
|
||||
Load all watches from storage.
|
||||
|
||||
File backend implementation: reads individual watch.json files
|
||||
Implementation of abstract method from FileSavingDataStore.
|
||||
Delegates to helper function and stores results in internal data structure.
|
||||
"""
|
||||
watching, watch_hashes = load_all_watches(
|
||||
self.datastore_path,
|
||||
self.rehydrate_entity,
|
||||
self._compute_hash
|
||||
)
|
||||
|
||||
# Store loaded data
|
||||
self.__data['watching'] = watching
|
||||
self._watch_hashes = watch_hashes
|
||||
|
||||
# Verify all watches have hashes
|
||||
missing_hashes = [uuid for uuid in watching.keys() if uuid not in watch_hashes]
|
||||
if missing_hashes:
|
||||
logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after load: {missing_hashes[:5]}")
|
||||
else:
|
||||
logger.debug(f"All {len(watching)} watches have valid hashes")
|
||||
|
||||
def _delete_watch(self, uuid):
|
||||
"""
|
||||
Delete a watch from storage.
|
||||
|
||||
File backend implementation: deletes entire {uuid}/ directory recursively.
|
||||
Implementation of abstract method from FileSavingDataStore.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID to delete
|
||||
"""
|
||||
watch_dir = os.path.join(self.datastore_path, uuid)
|
||||
if os.path.exists(watch_dir):
|
||||
shutil.rmtree(watch_dir)
|
||||
logger.info(f"Deleted watch directory: {watch_dir}")
|
||||
|
||||
# ============================================================================
|
||||
# Watch Management Methods
|
||||
# ============================================================================
|
||||
|
||||
def set_last_viewed(self, uuid, timestamp):
|
||||
logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}")
|
||||
self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
|
||||
self.mark_watch_dirty(uuid)
|
||||
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
def remove_password(self):
|
||||
self.__data['settings']['application']['password'] = False
|
||||
self.mark_settings_dirty()
|
||||
|
||||
def update_watch(self, uuid, update_obj):
|
||||
|
||||
# It's possible that the watch could be deleted before update
|
||||
if not self.__data['watching'].get(uuid):
|
||||
return
|
||||
|
||||
with self.lock:
|
||||
|
||||
# In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
|
||||
for dict_key, d in self.generic_definition.items():
|
||||
if isinstance(d, dict):
|
||||
if update_obj is not None and dict_key in update_obj:
|
||||
self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
|
||||
del (update_obj[dict_key])
|
||||
|
||||
self.__data['watching'][uuid].update(update_obj)
|
||||
|
||||
self.mark_watch_dirty(uuid)
|
||||
|
||||
@property
|
||||
def threshold_seconds(self):
|
||||
seconds = 0
|
||||
for m, n in Watch.mtable.items():
|
||||
x = self.__data['settings']['requests']['time_between_check'].get(m)
|
||||
if x:
|
||||
seconds += x * n
|
||||
return seconds
|
||||
|
||||
@property
|
||||
def unread_changes_count(self):
|
||||
unread_changes_count = 0
|
||||
for uuid, watch in self.__data['watching'].items():
|
||||
if watch.history_n >= 2 and watch.viewed == False:
|
||||
unread_changes_count += 1
|
||||
|
||||
return unread_changes_count
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
# Re #152, Return env base_url if not overriden
|
||||
# Re #148 - Some people have just {{ base_url }} in the body or title, but this may break some notification services
|
||||
# like 'Join', so it's always best to atleast set something obvious so that they are not broken.
|
||||
|
||||
active_base_url = BASE_URL_NOT_SET_TEXT
|
||||
if self.__data['settings']['application'].get('base_url'):
|
||||
active_base_url = self.__data['settings']['application'].get('base_url')
|
||||
elif os.getenv('BASE_URL'):
|
||||
active_base_url = os.getenv('BASE_URL')
|
||||
|
||||
# I looked at various ways todo the following, but in the end just copying the dict seemed simplest/most reliable
|
||||
# even given the memory tradeoff - if you know a better way.. maybe return d|self.__data.. or something
|
||||
d = self.__data
|
||||
d['settings']['application']['active_base_url'] = active_base_url.strip('" ')
|
||||
return d
|
||||
|
||||
# Delete a single watch by UUID
|
||||
def delete(self, uuid):
|
||||
"""
|
||||
Delete a watch by UUID.
|
||||
|
||||
Uses abstracted storage method for backend-agnostic deletion.
|
||||
Supports 'all' to delete all watches (mainly for testing).
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID to delete, or 'all' to delete all watches
|
||||
"""
|
||||
with self.lock:
|
||||
if uuid == 'all':
|
||||
# Delete all watches - capture UUIDs first before modifying dict
|
||||
all_uuids = list(self.__data['watching'].keys())
|
||||
|
||||
for watch_uuid in all_uuids:
|
||||
# Delete from storage using polymorphic method
|
||||
try:
|
||||
self._delete_watch(watch_uuid)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete watch {watch_uuid} from storage: {e}")
|
||||
|
||||
# Clean up tracking data
|
||||
self._watch_hashes.pop(watch_uuid, None)
|
||||
self._dirty_watches.discard(watch_uuid)
|
||||
|
||||
# Send delete signal
|
||||
watch_delete_signal = signal('watch_deleted')
|
||||
if watch_delete_signal:
|
||||
watch_delete_signal.send(watch_uuid=watch_uuid)
|
||||
|
||||
# Clear the dict
|
||||
self.__data['watching'] = {}
|
||||
|
||||
# Mainly used for testing to allow all items to flush before running next test
|
||||
time.sleep(1)
|
||||
|
||||
else:
|
||||
# Delete single watch from storage using polymorphic method
|
||||
try:
|
||||
self._delete_watch(uuid)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete watch {uuid} from storage: {e}")
|
||||
|
||||
# Remove from watching dict
|
||||
del self.data['watching'][uuid]
|
||||
|
||||
# Clean up tracking data
|
||||
self._watch_hashes.pop(uuid, None)
|
||||
self._dirty_watches.discard(uuid)
|
||||
|
||||
# Send delete signal
|
||||
watch_delete_signal = signal('watch_deleted')
|
||||
if watch_delete_signal:
|
||||
watch_delete_signal.send(watch_uuid=uuid)
|
||||
|
||||
self.needs_write_urgent = True
|
||||
|
||||
# Clone a watch by UUID
|
||||
def clone(self, uuid):
|
||||
url = self.data['watching'][uuid].get('url')
|
||||
extras = deepcopy(self.data['watching'][uuid])
|
||||
new_uuid = self.add_watch(url=url, extras=extras)
|
||||
watch = self.data['watching'][new_uuid]
|
||||
return new_uuid
|
||||
|
||||
def url_exists(self, url):
|
||||
|
||||
# Probably their should be dict...
|
||||
for watch in self.data['watching'].values():
|
||||
if watch['url'].lower() == url.lower():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
# Remove a watchs data but keep the entry (URL etc)
|
||||
def clear_watch_history(self, uuid):
|
||||
self.__data['watching'][uuid].clear_watch()
|
||||
self.needs_write_urgent = True
|
||||
|
||||
def add_watch(self, url, tag='', extras=None, tag_uuids=None, save_immediately=True):
|
||||
|
||||
if extras is None:
|
||||
extras = {}
|
||||
|
||||
# Incase these are copied across, assume it's a reference and deepcopy()
|
||||
apply_extras = deepcopy(extras)
|
||||
apply_extras['tags'] = [] if not apply_extras.get('tags') else apply_extras.get('tags')
|
||||
|
||||
# Was it a share link? try to fetch the data
|
||||
if (url.startswith("https://changedetection.io/share/")):
|
||||
import requests
|
||||
|
||||
try:
|
||||
r = requests.request(method="GET",
|
||||
url=url,
|
||||
# So we know to return the JSON instead of the human-friendly "help" page
|
||||
headers={'App-Guid': self.__data['app_guid']},
|
||||
timeout=5.0) # 5 second timeout to prevent blocking
|
||||
res = r.json()
|
||||
|
||||
# List of permissible attributes we accept from the wild internet
|
||||
for k in [
|
||||
'body',
|
||||
'browser_steps',
|
||||
'css_filter',
|
||||
'extract_text',
|
||||
'headers',
|
||||
'ignore_text',
|
||||
'include_filters',
|
||||
'method',
|
||||
'paused',
|
||||
'previous_md5',
|
||||
'processor',
|
||||
'subtractive_selectors',
|
||||
'tag',
|
||||
'tags',
|
||||
'text_should_not_be_present',
|
||||
'title',
|
||||
'trigger_text',
|
||||
'url',
|
||||
'use_page_title_in_list',
|
||||
'webdriver_js_execute_code',
|
||||
]:
|
||||
if res.get(k):
|
||||
if k != 'css_filter':
|
||||
apply_extras[k] = res[k]
|
||||
else:
|
||||
# We renamed the field and made it a list
|
||||
apply_extras['include_filters'] = [res['css_filter']]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching metadata for shared watch link {url} {str(e)}")
|
||||
flash(gettext("Error fetching metadata for {}").format(url), 'error')
|
||||
return False
|
||||
|
||||
if not is_safe_valid_url(url):
|
||||
flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
|
||||
|
||||
return None
|
||||
|
||||
if tag and type(tag) == str:
|
||||
# Then it's probably a string of the actual tag by name, split and add it
|
||||
for t in tag.split(','):
|
||||
# for each stripped tag, add tag as UUID
|
||||
for a_t in t.split(','):
|
||||
tag_uuid = self.add_tag(a_t)
|
||||
apply_extras['tags'].append(tag_uuid)
|
||||
|
||||
# Or if UUIDs given directly
|
||||
if tag_uuids:
|
||||
for t in tag_uuids:
|
||||
apply_extras['tags'] = list(set(apply_extras['tags'] + [t.strip()]))
|
||||
|
||||
# Make any uuids unique
|
||||
if apply_extras.get('tags'):
|
||||
apply_extras['tags'] = list(set(apply_extras.get('tags')))
|
||||
|
||||
# If the processor also has its own Watch implementation
|
||||
watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor'))
|
||||
new_watch = watch_class(datastore_path=self.datastore_path, url=url)
|
||||
|
||||
new_uuid = new_watch.get('uuid')
|
||||
|
||||
logger.debug(f"Adding URL '{url}' - {new_uuid}")
|
||||
|
||||
for k in ['uuid', 'history', 'last_checked', 'last_changed', 'newest_history_key', 'previous_md5', 'viewed']:
|
||||
if k in apply_extras:
|
||||
del apply_extras[k]
|
||||
|
||||
if not apply_extras.get('date_created'):
|
||||
apply_extras['date_created'] = int(time.time())
|
||||
|
||||
new_watch.update(apply_extras)
|
||||
new_watch.ensure_data_dir_exists()
|
||||
self.__data['watching'][new_uuid] = new_watch
|
||||
|
||||
if save_immediately:
|
||||
# Save immediately using polymorphic method
|
||||
try:
|
||||
self.save_watch(new_uuid, force=True)
|
||||
logger.debug(f"Saved new watch {new_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save new watch {new_uuid}: {e}")
|
||||
# Mark dirty for retry
|
||||
self.mark_watch_dirty(new_uuid)
|
||||
else:
|
||||
self.mark_watch_dirty(new_uuid)
|
||||
|
||||
logger.debug(f"Added '{url}'")
|
||||
|
||||
return new_uuid
|
||||
|
||||
def _watch_resource_exists(self, watch_uuid, resource_name):
|
||||
"""
|
||||
Check if a watch-related resource exists.
|
||||
|
||||
File backend implementation: checks if file exists in watch directory.
|
||||
|
||||
Args:
|
||||
watch_uuid: Watch UUID
|
||||
resource_name: Name of resource (e.g., "last-screenshot.png")
|
||||
|
||||
Returns:
|
||||
bool: True if resource exists
|
||||
"""
|
||||
resource_path = os.path.join(self.datastore_path, watch_uuid, resource_name)
|
||||
return path.isfile(resource_path)
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
"""
|
||||
Check if visual selector data (screenshot + elements) is ready.
|
||||
|
||||
Returns:
|
||||
bool: True if both screenshot and elements data exist
|
||||
"""
|
||||
has_screenshot = self._watch_resource_exists(watch_uuid, "last-screenshot.png")
|
||||
has_elements = self._watch_resource_exists(watch_uuid, "elements.deflate")
|
||||
return has_screenshot and has_elements
|
||||
|
||||
# Old sync_to_json and save_datastore methods removed - now handled by FileSavingDataStore parent class
|
||||
|
||||
# Go through the datastore path and remove any snapshots that are not mentioned in the index
|
||||
# This usually is not used, but can be handy.
|
||||
def remove_unused_snapshots(self):
|
||||
logger.info("Removing snapshots from datastore that are not in the index..")
|
||||
|
||||
index = []
|
||||
for uuid in self.data['watching']:
|
||||
for id in self.data['watching'][uuid].history:
|
||||
index.append(self.data['watching'][uuid].history[str(id)])
|
||||
|
||||
import pathlib
|
||||
|
||||
# Only in the sub-directories
|
||||
for uuid in self.data['watching']:
|
||||
for item in pathlib.Path(self.datastore_path).rglob(uuid + "/*.txt"):
|
||||
if not str(item) in index:
|
||||
logger.info(f"Removing {item}")
|
||||
unlink(item)
|
||||
|
||||
@property
|
||||
def proxy_list(self):
|
||||
proxy_list = {}
|
||||
proxy_list_file = os.path.join(self.datastore_path, 'proxies.json')
|
||||
|
||||
# Load from external config file
|
||||
if path.isfile(proxy_list_file):
|
||||
if HAS_ORJSON:
|
||||
# orjson.loads() expects UTF-8 encoded bytes #3611
|
||||
with open(os.path.join(self.datastore_path, "proxies.json"), 'rb') as f:
|
||||
proxy_list = orjson.loads(f.read())
|
||||
else:
|
||||
with open(os.path.join(self.datastore_path, "proxies.json"), encoding='utf-8') as f:
|
||||
proxy_list = json.load(f)
|
||||
|
||||
# Mapping from UI config if available
|
||||
extras = self.data['settings']['requests'].get('extra_proxies')
|
||||
if extras:
|
||||
i = 0
|
||||
for proxy in extras:
|
||||
i += 0
|
||||
if proxy.get('proxy_name') and proxy.get('proxy_url'):
|
||||
k = "ui-" + str(i) + proxy.get('proxy_name')
|
||||
proxy_list[k] = {'label': proxy.get('proxy_name'), 'url': proxy.get('proxy_url')}
|
||||
|
||||
if proxy_list and strtobool(os.getenv('ENABLE_NO_PROXY_OPTION', 'True')):
|
||||
proxy_list["no-proxy"] = {'label': "No proxy", 'url': ''}
|
||||
|
||||
return proxy_list if len(proxy_list) else None
|
||||
|
||||
def get_preferred_proxy_for_watch(self, uuid):
|
||||
"""
|
||||
Returns the preferred proxy by ID key
|
||||
:param uuid: UUID
|
||||
:return: proxy "key" id
|
||||
"""
|
||||
|
||||
if self.proxy_list is None:
|
||||
return None
|
||||
|
||||
# If it's a valid one
|
||||
watch = self.data['watching'].get(uuid)
|
||||
|
||||
if strtobool(os.getenv('ENABLE_NO_PROXY_OPTION', 'True')) and watch.get('proxy') == "no-proxy":
|
||||
return None
|
||||
|
||||
if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()):
|
||||
return watch.get('proxy')
|
||||
|
||||
# not valid (including None), try the system one
|
||||
else:
|
||||
system_proxy_id = self.data['settings']['requests'].get('proxy')
|
||||
# Is not None and exists
|
||||
if self.proxy_list.get(system_proxy_id):
|
||||
return system_proxy_id
|
||||
|
||||
# Fallback - Did not resolve anything, or doesnt exist, use the first available
|
||||
if system_proxy_id is None or not self.proxy_list.get(system_proxy_id):
|
||||
first_default = list(self.proxy_list)[0]
|
||||
return first_default
|
||||
|
||||
return None
|
||||
|
||||
@property
|
||||
def has_extra_headers_file(self):
|
||||
filepath = os.path.join(self.datastore_path, 'headers.txt')
|
||||
return os.path.isfile(filepath)
|
||||
|
||||
def get_all_base_headers(self):
|
||||
headers = {}
|
||||
# Global app settings
|
||||
headers.update(self.data['settings'].get('headers', {}))
|
||||
|
||||
return headers
|
||||
|
||||
def get_all_headers_in_textfile_for_watch(self, uuid):
|
||||
from ..model.App import parse_headers_from_text_file
|
||||
headers = {}
|
||||
|
||||
# Global in /datastore/headers.txt
|
||||
filepath = os.path.join(self.datastore_path, 'headers.txt')
|
||||
try:
|
||||
if os.path.isfile(filepath):
|
||||
headers.update(parse_headers_from_text_file(filepath))
|
||||
except Exception as e:
|
||||
logger.error(f"ERROR reading headers.txt at {filepath} {str(e)}")
|
||||
|
||||
watch = self.data['watching'].get(uuid)
|
||||
if watch:
|
||||
|
||||
# In /datastore/xyz-xyz/headers.txt
|
||||
filepath = os.path.join(watch.watch_data_dir, 'headers.txt')
|
||||
try:
|
||||
if os.path.isfile(filepath):
|
||||
headers.update(parse_headers_from_text_file(filepath))
|
||||
except Exception as e:
|
||||
logger.error(f"ERROR reading headers.txt at {filepath} {str(e)}")
|
||||
|
||||
# In /datastore/tag-name.txt
|
||||
tags = self.get_all_tags_for_watch(uuid=uuid)
|
||||
for tag_uuid, tag in tags.items():
|
||||
fname = "headers-" + re.sub(r'[\W_]', '', tag.get('title')).lower().strip() + ".txt"
|
||||
filepath = os.path.join(self.datastore_path, fname)
|
||||
try:
|
||||
if os.path.isfile(filepath):
|
||||
headers.update(parse_headers_from_text_file(filepath))
|
||||
except Exception as e:
|
||||
logger.error(f"ERROR reading headers.txt at {filepath} {str(e)}")
|
||||
|
||||
return headers
|
||||
|
||||
def get_tag_overrides_for_watch(self, uuid, attr):
|
||||
tags = self.get_all_tags_for_watch(uuid=uuid)
|
||||
ret = []
|
||||
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
if attr in tag and tag[attr]:
|
||||
ret = [*ret, *tag[attr]]
|
||||
|
||||
return ret
|
||||
|
||||
def add_tag(self, title):
|
||||
# If name exists, return that
|
||||
n = title.strip().lower()
|
||||
logger.debug(f">>> Adding new tag - '{n}'")
|
||||
if not n:
|
||||
return False
|
||||
|
||||
for uuid, tag in self.__data['settings']['application'].get('tags', {}).items():
|
||||
if n == tag.get('title', '').lower().strip():
|
||||
logger.warning(f"Tag '{title}' already exists, skipping creation.")
|
||||
return uuid
|
||||
|
||||
# Eventually almost everything todo with a watch will apply as a Tag
|
||||
# So we use the same model as a Watch
|
||||
with self.lock:
|
||||
from ..model import Tag
|
||||
new_tag = Tag.model(datastore_path=self.datastore_path, default={
|
||||
'title': title.strip(),
|
||||
'date_created': int(time.time())
|
||||
})
|
||||
|
||||
new_uuid = new_tag.get('uuid')
|
||||
|
||||
self.__data['settings']['application']['tags'][new_uuid] = new_tag
|
||||
|
||||
self.mark_settings_dirty()
|
||||
return new_uuid
|
||||
|
||||
def get_all_tags_for_watch(self, uuid):
|
||||
"""This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet"""
|
||||
watch = self.data['watching'].get(uuid)
|
||||
|
||||
# Should return a dict of full tag info linked by UUID
|
||||
if watch:
|
||||
return dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))
|
||||
|
||||
return {}
|
||||
|
||||
@property
|
||||
def extra_browsers(self):
|
||||
res = []
|
||||
p = list(filter(
|
||||
lambda s: (s.get('browser_name') and s.get('browser_connection_url')),
|
||||
self.__data['settings']['requests'].get('extra_browsers', [])))
|
||||
if p:
|
||||
for i in p:
|
||||
res.append(("extra_browser_" + i['browser_name'], i['browser_name']))
|
||||
|
||||
return res
|
||||
|
||||
def tag_exists_by_name(self, tag_name):
|
||||
# Check if any tag dictionary has a 'title' attribute matching the provided tag_name
|
||||
tags = self.__data['settings']['application']['tags'].values()
|
||||
return next((v for v in tags if v.get('title', '').lower() == tag_name.lower()),
|
||||
None)
|
||||
|
||||
def any_watches_have_processor_by_name(self, processor_name):
|
||||
for watch in self.data['watching'].values():
|
||||
if watch.get('processor') == processor_name:
|
||||
return True
|
||||
return False
|
||||
|
||||
def search_watches_for_url(self, query, tag_limit=None, partial=False):
|
||||
"""Search watches by URL, title, or error messages
|
||||
|
||||
Args:
|
||||
query (str): Search term to match against watch URLs, titles, and error messages
|
||||
tag_limit (str, optional): Optional tag name to limit search results
|
||||
partial: (bool, optional): sub-string matching
|
||||
|
||||
Returns:
|
||||
list: List of UUIDs of watches that match the search criteria
|
||||
"""
|
||||
matching_uuids = []
|
||||
query = query.lower().strip()
|
||||
tag = self.tag_exists_by_name(tag_limit) if tag_limit else False
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
# Filter by tag if requested
|
||||
if tag_limit:
|
||||
if not tag.get('uuid') in watch.get('tags', []):
|
||||
continue
|
||||
|
||||
# Search in URL, title, or error messages
|
||||
if partial:
|
||||
if ((watch.get('title') and query in watch.get('title').lower()) or
|
||||
query in watch.get('url', '').lower() or
|
||||
(watch.get('last_error') and query in watch.get('last_error').lower())):
|
||||
matching_uuids.append(uuid)
|
||||
else:
|
||||
if ((watch.get('title') and query == watch.get('title').lower()) or
|
||||
query == watch.get('url', '').lower() or
|
||||
(watch.get('last_error') and query == watch.get('last_error').lower())):
|
||||
matching_uuids.append(uuid)
|
||||
|
||||
return matching_uuids
|
||||
|
||||
def get_unique_notification_tokens_available(self):
|
||||
# Ask each type of watch if they have any extra notification token to add to the validation
|
||||
extra_notification_tokens = {}
|
||||
watch_processors_checked = set()
|
||||
|
||||
for watch_uuid, watch in self.__data['watching'].items():
|
||||
processor = watch.get('processor')
|
||||
if processor not in watch_processors_checked:
|
||||
extra_notification_tokens.update(watch.extra_notification_token_values())
|
||||
watch_processors_checked.add(processor)
|
||||
|
||||
return extra_notification_tokens
|
||||
|
||||
def get_unique_notification_token_placeholders_available(self):
|
||||
# The actual description of the tokens, could be combined with get_unique_notification_tokens_available instead of doing this twice
|
||||
extra_notification_tokens = []
|
||||
watch_processors_checked = set()
|
||||
|
||||
for watch_uuid, watch in self.__data['watching'].items():
|
||||
processor = watch.get('processor')
|
||||
if processor not in watch_processors_checked:
|
||||
extra_notification_tokens += watch.extra_notification_token_placeholder_info()
|
||||
watch_processors_checked.add(processor)
|
||||
|
||||
return extra_notification_tokens
|
||||
|
||||
def add_notification_url(self, notification_url):
|
||||
|
||||
logger.debug(f">>> Adding new notification_url - '{notification_url}'")
|
||||
|
||||
notification_urls = self.data['settings']['application'].get('notification_urls', [])
|
||||
|
||||
if notification_url in notification_urls:
|
||||
return notification_url
|
||||
|
||||
with self.lock:
|
||||
notification_urls = self.__data['settings']['application'].get('notification_urls', [])
|
||||
|
||||
if notification_url in notification_urls:
|
||||
return notification_url
|
||||
|
||||
# Append and update the datastore
|
||||
notification_urls.append(notification_url)
|
||||
self.__data['settings']['application']['notification_urls'] = notification_urls
|
||||
|
||||
self.mark_settings_dirty()
|
||||
return notification_url
|
||||
|
||||
# Schema update methods moved to store/updates.py (DatastoreUpdatesMixin)
|
||||
# This includes: get_updates_available(), run_updates(), and update_1() through update_26()
|
||||
100
changedetectionio/store/base.py
Normal file
100
changedetectionio/store/base.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Base classes for the datastore.
|
||||
|
||||
This module defines the abstract interfaces that all datastore implementations must follow.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from threading import Lock
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class DataStore(ABC):
|
||||
"""
|
||||
Abstract base class for all datastore implementations.
|
||||
|
||||
Defines the core interface that all datastores must implement for:
|
||||
- Loading and saving data
|
||||
- Managing watches
|
||||
- Handling settings
|
||||
- Providing data access
|
||||
"""
|
||||
|
||||
lock = Lock()
|
||||
datastore_path = None
|
||||
|
||||
@abstractmethod
|
||||
def reload_state(self, datastore_path, include_default_watches, version_tag):
|
||||
"""
|
||||
Load data from persistent storage.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
include_default_watches: Whether to create default watches if none exist
|
||||
version_tag: Application version string
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def add_watch(self, url, **kwargs):
|
||||
"""
|
||||
Add a new watch.
|
||||
|
||||
Args:
|
||||
url: URL to watch
|
||||
**kwargs: Additional watch parameters
|
||||
|
||||
Returns:
|
||||
UUID of the created watch
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def update_watch(self, uuid, update_obj):
|
||||
"""
|
||||
Update an existing watch.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
update_obj: Dictionary of fields to update
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def delete(self, uuid):
|
||||
"""
|
||||
Delete a watch.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID to delete
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def data(self):
|
||||
"""
|
||||
Access to the underlying data structure.
|
||||
|
||||
Returns:
|
||||
Dictionary containing all datastore data
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def force_save_all(self):
|
||||
"""
|
||||
Force immediate synchronous save of all data to storage.
|
||||
|
||||
This is the abstract method for forcing a complete save.
|
||||
Different backends implement this differently:
|
||||
- File backend: Mark all watches/settings dirty, then save
|
||||
- Redis backend: SAVE command or pipeline flush
|
||||
- SQL backend: COMMIT transaction
|
||||
|
||||
Used by:
|
||||
- Backup creation (ensure everything is saved before backup)
|
||||
- Shutdown (ensure all changes are persisted)
|
||||
- Manual save operations
|
||||
"""
|
||||
pass
|
||||
898
changedetectionio/store/file_saving_datastore.py
Normal file
898
changedetectionio/store/file_saving_datastore.py
Normal file
@@ -0,0 +1,898 @@
|
||||
"""
|
||||
File-based datastore with individual watch persistence and dirty tracking.
|
||||
|
||||
This module provides the FileSavingDataStore abstract class that implements:
|
||||
- Individual watch.json file persistence
|
||||
- Hash-based change detection (only save what changed)
|
||||
- Periodic audit scan (catches unmarked changes)
|
||||
- Background save thread with batched parallel saves
|
||||
- Atomic file writes safe for NFS/NAS
|
||||
"""
|
||||
|
||||
import glob
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from threading import Thread
|
||||
from loguru import logger
|
||||
|
||||
from .base import DataStore
|
||||
from .. import strtobool
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
HAS_ORJSON = True
|
||||
except ImportError:
|
||||
HAS_ORJSON = False
|
||||
|
||||
# Fsync configuration: Force file data to disk for crash safety
|
||||
# Default False to match legacy behavior (write-and-rename without fsync)
|
||||
# Set to True for mission-critical deployments requiring crash consistency
|
||||
FORCE_FSYNC_DATA_IS_CRITICAL = bool(strtobool(os.getenv('FORCE_FSYNC_DATA_IS_CRITICAL', 'False')))
|
||||
|
||||
# Save interval configuration: How often the background thread saves dirty items
|
||||
# Default 10 seconds - increase for less frequent saves, decrease for more frequent
|
||||
DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS = int(os.getenv('DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS', '10'))
|
||||
|
||||
# Rolling audit configuration: Scans a fraction of watches each cycle
|
||||
# Default: Run audit every 10s, split into 5 shards
|
||||
# Full audit completes every 50s (10s × 5 shards)
|
||||
# With 56k watches: 56k / 5 = ~11k watches per cycle (~60ms vs 316ms for all)
|
||||
# Handles dynamic watch count - recalculates shard boundaries each cycle
|
||||
DATASTORE_AUDIT_INTERVAL_SECONDS = int(os.getenv('DATASTORE_AUDIT_INTERVAL_SECONDS', '10'))
|
||||
DATASTORE_AUDIT_SHARDS = int(os.getenv('DATASTORE_AUDIT_SHARDS', '5'))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helper Functions for Atomic File Operations
|
||||
# ============================================================================
|
||||
|
||||
def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
|
||||
"""
|
||||
Save JSON data to disk using atomic write pattern.
|
||||
|
||||
Generic helper for saving any JSON data (settings, watches, etc.) with:
|
||||
- Atomic write (temp file + rename)
|
||||
- Directory fsync for crash consistency (only for new files)
|
||||
- Size validation
|
||||
- Proper error handling
|
||||
|
||||
Args:
|
||||
file_path: Full path to target JSON file
|
||||
data_dict: Dictionary to serialize
|
||||
label: Human-readable label for error messages (e.g., "watch", "settings")
|
||||
max_size_mb: Maximum allowed file size in MB
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds max_size_mb
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
"""
|
||||
# Check if file already exists (before we start writing)
|
||||
# Directory fsync only needed for NEW files to persist the filename
|
||||
file_exists = os.path.exists(file_path)
|
||||
|
||||
# Ensure parent directory exists
|
||||
parent_dir = os.path.dirname(file_path)
|
||||
os.makedirs(parent_dir, exist_ok=True)
|
||||
|
||||
# Create temp file in same directory (required for NFS atomicity)
|
||||
fd, temp_path = tempfile.mkstemp(
|
||||
suffix='.tmp',
|
||||
prefix='json-',
|
||||
dir=parent_dir,
|
||||
text=False
|
||||
)
|
||||
|
||||
fd_closed = False
|
||||
try:
|
||||
# Serialize data
|
||||
t0 = time.time()
|
||||
if HAS_ORJSON:
|
||||
data = orjson.dumps(data_dict, option=orjson.OPT_INDENT_2)
|
||||
else:
|
||||
data = json.dumps(data_dict, indent=2, ensure_ascii=False).encode('utf-8')
|
||||
serialize_ms = (time.time() - t0) * 1000
|
||||
|
||||
# Safety check: validate size
|
||||
MAX_SIZE = max_size_mb * 1024 * 1024
|
||||
data_size = len(data)
|
||||
if data_size > MAX_SIZE:
|
||||
raise ValueError(
|
||||
f"{label.capitalize()} data is unexpectedly large: {data_size / 1024 / 1024:.2f}MB "
|
||||
f"(max: {max_size_mb}MB). This indicates a bug or data corruption."
|
||||
)
|
||||
|
||||
# Write to temp file
|
||||
t1 = time.time()
|
||||
os.write(fd, data)
|
||||
write_ms = (time.time() - t1) * 1000
|
||||
|
||||
# Optional fsync: Force file data to disk for crash safety
|
||||
# Only if FORCE_FSYNC_DATA_IS_CRITICAL=True (default: False, matches legacy behavior)
|
||||
t2 = time.time()
|
||||
if FORCE_FSYNC_DATA_IS_CRITICAL:
|
||||
os.fsync(fd)
|
||||
file_fsync_ms = (time.time() - t2) * 1000
|
||||
|
||||
os.close(fd)
|
||||
fd_closed = True
|
||||
|
||||
# Atomic rename
|
||||
t3 = time.time()
|
||||
os.replace(temp_path, file_path)
|
||||
rename_ms = (time.time() - t3) * 1000
|
||||
|
||||
# Sync directory to ensure filename metadata is durable
|
||||
# OPTIMIZATION: Only needed for NEW files. Existing files already have
|
||||
# directory entry persisted, so we only need file fsync for data durability.
|
||||
dir_fsync_ms = 0
|
||||
if not file_exists:
|
||||
try:
|
||||
dir_fd = os.open(parent_dir, os.O_RDONLY)
|
||||
try:
|
||||
t4 = time.time()
|
||||
os.fsync(dir_fd)
|
||||
dir_fsync_ms = (time.time() - t4) * 1000
|
||||
finally:
|
||||
os.close(dir_fd)
|
||||
except (OSError, AttributeError):
|
||||
# Windows doesn't support fsync on directories
|
||||
pass
|
||||
|
||||
# Log timing breakdown for slow saves
|
||||
# total_ms = serialize_ms + write_ms + file_fsync_ms + rename_ms + dir_fsync_ms
|
||||
# if total_ms: # Log if save took more than 10ms
|
||||
# file_status = "new" if not file_exists else "update"
|
||||
# logger.trace(
|
||||
# f"Save timing breakdown ({total_ms:.1f}ms total, {file_status}): "
|
||||
# f"serialize={serialize_ms:.1f}ms, write={write_ms:.1f}ms, "
|
||||
# f"file_fsync={file_fsync_ms:.1f}ms, rename={rename_ms:.1f}ms, "
|
||||
# f"dir_fsync={dir_fsync_ms:.1f}ms, using_orjson={HAS_ORJSON}"
|
||||
# )
|
||||
|
||||
except OSError as e:
|
||||
# Cleanup temp file
|
||||
if not fd_closed:
|
||||
try:
|
||||
os.close(fd)
|
||||
except:
|
||||
pass
|
||||
if os.path.exists(temp_path):
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Provide helpful error messages
|
||||
if e.errno == 28: # ENOSPC
|
||||
raise OSError(f"Disk full: Cannot save {label}") from e
|
||||
elif e.errno == 122: # EDQUOT
|
||||
raise OSError(f"Disk quota exceeded: Cannot save {label}") from e
|
||||
else:
|
||||
raise OSError(f"I/O error saving {label}: {e}") from e
|
||||
|
||||
except Exception as e:
|
||||
# Cleanup temp file
|
||||
if not fd_closed:
|
||||
try:
|
||||
os.close(fd)
|
||||
except:
|
||||
pass
|
||||
if os.path.exists(temp_path):
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except:
|
||||
pass
|
||||
raise e
|
||||
|
||||
|
||||
def save_watch_atomic(watch_dir, uuid, watch_dict):
|
||||
"""
|
||||
Save a watch to disk using atomic write pattern.
|
||||
|
||||
Convenience wrapper around save_json_atomic for watches.
|
||||
|
||||
Args:
|
||||
watch_dir: Directory for this watch (e.g., /datastore/{uuid})
|
||||
uuid: Watch UUID (for logging)
|
||||
watch_dict: Dictionary representation of the watch
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds 10MB (indicates bug or corruption)
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
"""
|
||||
watch_json = os.path.join(watch_dir, "watch.json")
|
||||
save_json_atomic(watch_json, watch_dict, label=f"watch {uuid}", max_size_mb=10)
|
||||
|
||||
|
||||
def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
"""
|
||||
Load a watch from its JSON file.
|
||||
|
||||
Args:
|
||||
watch_json: Path to the watch.json file
|
||||
uuid: Watch UUID
|
||||
rehydrate_entity_func: Function to convert dict to Watch object
|
||||
|
||||
Returns:
|
||||
Tuple of (Watch object, raw_data_dict) or (None, None) if failed
|
||||
The raw_data_dict is needed to compute the hash before rehydration
|
||||
"""
|
||||
try:
|
||||
# Check file size before reading
|
||||
file_size = os.path.getsize(watch_json)
|
||||
MAX_WATCH_SIZE = 10 * 1024 * 1024 # 10MB
|
||||
if file_size > MAX_WATCH_SIZE:
|
||||
logger.critical(
|
||||
f"CORRUPTED WATCH DATA: Watch {uuid} file is unexpectedly large: "
|
||||
f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_WATCH_SIZE / 1024 / 1024}MB). "
|
||||
f"File: {watch_json}. This indicates a bug or data corruption. "
|
||||
f"Watch will be skipped."
|
||||
)
|
||||
return None, None
|
||||
|
||||
if HAS_ORJSON:
|
||||
with open(watch_json, 'rb') as f:
|
||||
watch_data = orjson.loads(f.read())
|
||||
else:
|
||||
with open(watch_json, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
|
||||
if watch_data.get('time_schedule_limit'):
|
||||
del watch_data['time_schedule_limit']
|
||||
if watch_data.get('time_between_check'):
|
||||
del watch_data['time_between_check']
|
||||
|
||||
# Return both the raw data and the rehydrated watch
|
||||
# Raw data is needed to compute hash before rehydration changes anything
|
||||
watch_obj = rehydrate_entity_func(uuid, watch_data)
|
||||
return watch_obj, watch_data
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.critical(
|
||||
f"CORRUPTED WATCH DATA: Failed to parse JSON for watch {uuid}. "
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None, None
|
||||
except ValueError as e:
|
||||
# orjson raises ValueError for invalid JSON
|
||||
if "invalid json" in str(e).lower() or HAS_ORJSON:
|
||||
logger.critical(
|
||||
f"CORRUPTED WATCH DATA: Failed to parse JSON for watch {uuid}. "
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None, None
|
||||
# Re-raise if it's not a JSON parsing error
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
|
||||
return None, None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
|
||||
return None, None
|
||||
|
||||
|
||||
def load_all_watches(datastore_path, rehydrate_entity_func, compute_hash_func):
|
||||
"""
|
||||
Load all watches from individual watch.json files.
|
||||
|
||||
SYNCHRONOUS loading: Blocks until all watches are loaded.
|
||||
This ensures data consistency - web server won't accept requests
|
||||
until all watches are available. Progress logged every 100 watches.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
rehydrate_entity_func: Function to convert dict to Watch object
|
||||
compute_hash_func: Function to compute hash from raw watch dict
|
||||
|
||||
Returns:
|
||||
Tuple of (watching_dict, hashes_dict)
|
||||
- watching_dict: uuid -> Watch object
|
||||
- hashes_dict: uuid -> hash string (computed from raw data)
|
||||
"""
|
||||
start_time = time.time()
|
||||
logger.info("Loading watches from individual watch.json files...")
|
||||
|
||||
watching = {}
|
||||
watch_hashes = {}
|
||||
|
||||
if not os.path.exists(datastore_path):
|
||||
return watching, watch_hashes
|
||||
|
||||
# Find all watch.json files using glob (faster than manual directory traversal)
|
||||
glob_start = time.time()
|
||||
watch_files = glob.glob(os.path.join(datastore_path, "*", "watch.json"))
|
||||
glob_time = time.time() - glob_start
|
||||
|
||||
total = len(watch_files)
|
||||
logger.debug(f"Found {total} watch.json files in {glob_time:.3f}s")
|
||||
|
||||
loaded = 0
|
||||
failed = 0
|
||||
|
||||
for watch_json in watch_files:
|
||||
# Extract UUID from path: /datastore/{uuid}/watch.json
|
||||
uuid_dir = os.path.basename(os.path.dirname(watch_json))
|
||||
watch, raw_data = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
|
||||
if watch and raw_data:
|
||||
watching[uuid_dir] = watch
|
||||
# Compute hash from rehydrated Watch object (as dict) to match how we compute on save
|
||||
# This ensures hash matches what audit will compute from dict(watch)
|
||||
watch_hashes[uuid_dir] = compute_hash_func(dict(watch))
|
||||
loaded += 1
|
||||
|
||||
if loaded % 100 == 0:
|
||||
logger.info(f"Loaded {loaded}/{total} watches...")
|
||||
else:
|
||||
# load_watch_from_file already logged the specific error
|
||||
failed += 1
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if failed > 0:
|
||||
logger.critical(
|
||||
f"LOAD COMPLETE: {loaded} watches loaded successfully, "
|
||||
f"{failed} watches FAILED to load (corrupted or invalid) "
|
||||
f"in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)"
|
||||
)
|
||||
else:
|
||||
logger.info(f"Loaded {loaded} watches from disk in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)")
|
||||
|
||||
return watching, watch_hashes
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# FileSavingDataStore Class
|
||||
# ============================================================================
|
||||
|
||||
class FileSavingDataStore(DataStore):
|
||||
"""
|
||||
Abstract datastore that provides file persistence with change tracking.
|
||||
|
||||
Features:
|
||||
- Individual watch.json files (one per watch)
|
||||
- Dirty tracking: Only saves items that have changed
|
||||
- Hash-based change detection: Prevents unnecessary writes
|
||||
- Background save thread: Non-blocking persistence
|
||||
- Two-tier urgency: Standard (60s) and urgent (immediate) saves
|
||||
|
||||
Subclasses must implement:
|
||||
- rehydrate_entity(): Convert dict to Watch object
|
||||
- Access to internal __data structure for watch management
|
||||
"""
|
||||
|
||||
needs_write = False
|
||||
needs_write_urgent = False
|
||||
stop_thread = False
|
||||
|
||||
# Change tracking
|
||||
_dirty_watches = set() # Watch UUIDs that need saving
|
||||
_dirty_settings = False # Settings changed
|
||||
_watch_hashes = {} # UUID -> SHA256 hash for change detection
|
||||
|
||||
# Health monitoring
|
||||
_last_save_time = 0 # Timestamp of last successful save
|
||||
_last_audit_time = 0 # Timestamp of last audit scan
|
||||
_save_cycle_count = 0 # Number of save cycles completed
|
||||
_total_saves = 0 # Total watches saved (lifetime)
|
||||
_save_errors = 0 # Total save errors (lifetime)
|
||||
_audit_count = 0 # Number of audit scans completed
|
||||
_audit_found_changes = 0 # Total unmarked changes found by audits
|
||||
_audit_shard_index = 0 # Current shard being audited (rolling audit)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.save_data_thread = None
|
||||
self._last_save_time = time.time()
|
||||
self._last_audit_time = time.time()
|
||||
|
||||
|
||||
def mark_watch_dirty(self, uuid):
|
||||
"""
|
||||
Mark a watch as needing save.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
"""
|
||||
with self.lock:
|
||||
self._dirty_watches.add(uuid)
|
||||
dirty_count = len(self._dirty_watches)
|
||||
|
||||
# Backpressure detection - warn if dirty set grows too large
|
||||
if dirty_count > 1000:
|
||||
logger.critical(
|
||||
f"BACKPRESSURE WARNING: {dirty_count} watches pending save! "
|
||||
f"Save thread may not be keeping up with write rate. "
|
||||
f"This could indicate disk I/O bottleneck or save thread failure."
|
||||
)
|
||||
elif dirty_count > 500:
|
||||
logger.warning(
|
||||
f"Dirty watch count high: {dirty_count} watches pending save. "
|
||||
f"Monitoring for potential backpressure."
|
||||
)
|
||||
|
||||
self.needs_write = True
|
||||
|
||||
def mark_settings_dirty(self):
|
||||
"""Mark settings as needing save."""
|
||||
with self.lock:
|
||||
self._dirty_settings = True
|
||||
self.needs_write = True
|
||||
|
||||
def _compute_hash(self, watch_dict):
|
||||
"""
|
||||
Compute SHA256 hash of watch for change detection.
|
||||
|
||||
Args:
|
||||
watch_dict: Dictionary representation of watch
|
||||
|
||||
Returns:
|
||||
Hex string of SHA256 hash
|
||||
"""
|
||||
# Use orjson for deterministic serialization if available
|
||||
if HAS_ORJSON:
|
||||
json_bytes = orjson.dumps(watch_dict, option=orjson.OPT_SORT_KEYS)
|
||||
else:
|
||||
json_str = json.dumps(watch_dict, sort_keys=True, ensure_ascii=False)
|
||||
json_bytes = json_str.encode('utf-8')
|
||||
|
||||
return hashlib.sha256(json_bytes).hexdigest()
|
||||
|
||||
def save_watch(self, uuid, force=False, watch_dict=None, current_hash=None):
|
||||
"""
|
||||
Save a single watch if it has changed (polymorphic method).
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
force: If True, skip hash check and save anyway
|
||||
watch_dict: Pre-computed watch dictionary (optimization)
|
||||
current_hash: Pre-computed hash (optimization)
|
||||
|
||||
Returns:
|
||||
True if saved, False if skipped (unchanged)
|
||||
"""
|
||||
if not self._watch_exists(uuid):
|
||||
logger.warning(f"Cannot save watch {uuid} - does not exist")
|
||||
return False
|
||||
|
||||
# Get watch dict if not provided
|
||||
if watch_dict is None:
|
||||
watch_dict = self._get_watch_dict(uuid)
|
||||
|
||||
# Compute hash if not provided
|
||||
if current_hash is None:
|
||||
current_hash = self._compute_hash(watch_dict)
|
||||
|
||||
# Skip save if unchanged (unless forced)
|
||||
if not force and current_hash == self._watch_hashes.get(uuid):
|
||||
return False
|
||||
|
||||
try:
|
||||
self._save_watch(uuid, watch_dict)
|
||||
self._watch_hashes[uuid] = current_hash
|
||||
logger.debug(f"Saved watch {uuid}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save watch {uuid}: {e}")
|
||||
raise
|
||||
|
||||
def _save_watch(self, uuid, watch_dict):
|
||||
"""
|
||||
Save a single watch to storage (polymorphic).
|
||||
|
||||
Backend-specific implementation. Subclasses override for different storage:
|
||||
- File backend: Writes to {uuid}/watch.json
|
||||
- Redis backend: SET watch:{uuid}
|
||||
- SQL backend: UPDATE watches WHERE uuid=?
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
watch_dict: Dictionary representation of watch
|
||||
"""
|
||||
# Default file implementation
|
||||
watch_dir = os.path.join(self.datastore_path, uuid)
|
||||
save_watch_atomic(watch_dir, uuid, watch_dict)
|
||||
|
||||
def _save_settings(self):
|
||||
"""
|
||||
Save settings to storage (polymorphic).
|
||||
|
||||
Subclasses must implement for their backend.
|
||||
- File: changedetection.json
|
||||
- Redis: SET settings
|
||||
- SQL: UPDATE settings table
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _save_settings")
|
||||
|
||||
def _load_watches(self):
|
||||
"""
|
||||
Load all watches from storage (polymorphic).
|
||||
|
||||
Subclasses must implement for their backend.
|
||||
- File: Read individual watch.json files
|
||||
- Redis: SCAN watch:* keys
|
||||
- SQL: SELECT * FROM watches
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _load_watches")
|
||||
|
||||
def _delete_watch(self, uuid):
|
||||
"""
|
||||
Delete a watch from storage (polymorphic).
|
||||
|
||||
Subclasses must implement for their backend.
|
||||
- File: Delete {uuid}/ directory recursively
|
||||
- Redis: DEL watch:{uuid}
|
||||
- SQL: DELETE FROM watches WHERE uuid=?
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID to delete
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _delete_watch")
|
||||
|
||||
def _save_dirty_items(self):
|
||||
"""
|
||||
Save dirty watches and settings.
|
||||
|
||||
This is the core optimization: instead of saving the entire datastore,
|
||||
we only save watches that were marked dirty and settings if changed.
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Capture dirty sets under lock
|
||||
with self.lock:
|
||||
dirty_watches = list(self._dirty_watches)
|
||||
dirty_settings = self._dirty_settings
|
||||
self._dirty_watches.clear()
|
||||
self._dirty_settings = False
|
||||
|
||||
if not dirty_watches and not dirty_settings:
|
||||
return
|
||||
|
||||
logger.trace(f"Saving {len(dirty_watches)} dirty watches, settings_dirty={dirty_settings}")
|
||||
|
||||
# Save each dirty watch using the polymorphic save method
|
||||
saved_count = 0
|
||||
error_count = 0
|
||||
skipped_unchanged = 0
|
||||
|
||||
# Process in batches of 50, using thread pool for parallel saves
|
||||
BATCH_SIZE = 50
|
||||
MAX_WORKERS = 20 # Number of parallel save threads
|
||||
|
||||
def save_single_watch(uuid):
|
||||
"""Helper function for thread pool execution."""
|
||||
try:
|
||||
# Check if watch still exists (might have been deleted)
|
||||
if not self._watch_exists(uuid):
|
||||
# Watch was deleted, remove hash
|
||||
self._watch_hashes.pop(uuid, None)
|
||||
return {'status': 'deleted', 'uuid': uuid}
|
||||
|
||||
# Pre-check hash to avoid unnecessary save_watch() calls
|
||||
watch_dict = self._get_watch_dict(uuid)
|
||||
current_hash = self._compute_hash(watch_dict)
|
||||
|
||||
if current_hash == self._watch_hashes.get(uuid):
|
||||
# Watch hasn't actually changed, skip
|
||||
return {'status': 'unchanged', 'uuid': uuid}
|
||||
|
||||
# Pass pre-computed values to avoid redundant serialization/hashing
|
||||
if self.save_watch(uuid, force=True, watch_dict=watch_dict, current_hash=current_hash):
|
||||
return {'status': 'saved', 'uuid': uuid}
|
||||
else:
|
||||
return {'status': 'skipped', 'uuid': uuid}
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving watch {uuid}: {e}")
|
||||
return {'status': 'error', 'uuid': uuid, 'error': e}
|
||||
|
||||
# Process dirty watches in batches
|
||||
for batch_start in range(0, len(dirty_watches), BATCH_SIZE):
|
||||
batch = dirty_watches[batch_start:batch_start + BATCH_SIZE]
|
||||
batch_num = (batch_start // BATCH_SIZE) + 1
|
||||
total_batches = (len(dirty_watches) + BATCH_SIZE - 1) // BATCH_SIZE
|
||||
|
||||
if len(dirty_watches) > BATCH_SIZE:
|
||||
logger.trace(f"Save batch {batch_num}/{total_batches} ({len(batch)} watches)")
|
||||
|
||||
# Use thread pool to save watches in parallel
|
||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||
# Submit all save tasks
|
||||
future_to_uuid = {executor.submit(save_single_watch, uuid): uuid for uuid in batch}
|
||||
|
||||
# Collect results as they complete
|
||||
for future in as_completed(future_to_uuid):
|
||||
result = future.result()
|
||||
status = result['status']
|
||||
|
||||
if status == 'saved':
|
||||
saved_count += 1
|
||||
elif status == 'unchanged':
|
||||
skipped_unchanged += 1
|
||||
elif status == 'error':
|
||||
error_count += 1
|
||||
# Re-mark for retry
|
||||
with self.lock:
|
||||
self._dirty_watches.add(result['uuid'])
|
||||
# 'deleted' and 'skipped' don't need special handling
|
||||
|
||||
# Save settings if changed
|
||||
if dirty_settings:
|
||||
try:
|
||||
self._save_settings()
|
||||
logger.debug("Saved settings")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save settings: {e}")
|
||||
error_count += 1
|
||||
with self.lock:
|
||||
self._dirty_settings = True
|
||||
|
||||
# Update metrics
|
||||
elapsed = time.time() - start_time
|
||||
self._save_cycle_count += 1
|
||||
self._total_saves += saved_count
|
||||
self._save_errors += error_count
|
||||
self._last_save_time = time.time()
|
||||
|
||||
# Log performance metrics
|
||||
if saved_count > 0:
|
||||
avg_time_per_watch = (elapsed / saved_count) * 1000 # milliseconds
|
||||
skipped_msg = f", {skipped_unchanged} unchanged" if skipped_unchanged > 0 else ""
|
||||
parallel_msg = f" [parallel: {MAX_WORKERS} workers]" if saved_count > 1 else ""
|
||||
logger.info(
|
||||
f"Successfully saved {saved_count} watches in {elapsed:.2f}s "
|
||||
f"(avg {avg_time_per_watch:.1f}ms per watch{skipped_msg}){parallel_msg}. "
|
||||
f"Total: {self._total_saves} saves, {self._save_errors} errors (lifetime)"
|
||||
)
|
||||
elif skipped_unchanged > 0:
|
||||
logger.debug(f"Save cycle: {skipped_unchanged} watches verified unchanged (hash match), nothing saved")
|
||||
|
||||
if error_count > 0:
|
||||
logger.error(f"Save cycle completed with {error_count} errors")
|
||||
|
||||
self.needs_write = False
|
||||
self.needs_write_urgent = False
|
||||
|
||||
def _watch_exists(self, uuid):
|
||||
"""
|
||||
Check if watch exists. Subclass must implement.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
|
||||
Returns:
|
||||
bool
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _watch_exists")
|
||||
|
||||
def _get_watch_dict(self, uuid):
|
||||
"""
|
||||
Get watch as dictionary. Subclass must implement.
|
||||
|
||||
Args:
|
||||
uuid: Watch UUID
|
||||
|
||||
Returns:
|
||||
Dictionary representation of watch
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _get_watch_dict")
|
||||
|
||||
def _audit_all_watches(self):
|
||||
"""
|
||||
Rolling audit: Scans a fraction of watches to detect unmarked changes.
|
||||
|
||||
Instead of scanning ALL watches at once, this scans 1/N shards per cycle.
|
||||
The shard rotates each cycle, completing a full audit every N cycles.
|
||||
|
||||
Handles dynamic watch count - recalculates shard boundaries each cycle,
|
||||
so newly added watches will be audited in subsequent cycles.
|
||||
|
||||
Benefits:
|
||||
- Lower CPU per cycle (56k / 5 = ~11k watches vs all 56k)
|
||||
- More frequent audits overall (every 50s vs every 10s)
|
||||
- Spreads load evenly across time
|
||||
"""
|
||||
audit_start = time.time()
|
||||
|
||||
# Get list of all watch UUIDs (read-only, no lock needed)
|
||||
try:
|
||||
all_uuids = list(self.data['watching'].keys())
|
||||
except (KeyError, AttributeError, RuntimeError):
|
||||
# Data structure not ready or being modified
|
||||
return
|
||||
|
||||
if not all_uuids:
|
||||
return
|
||||
|
||||
total_watches = len(all_uuids)
|
||||
|
||||
# Calculate this cycle's shard boundaries
|
||||
# Example: 56,278 watches / 5 shards = 11,255 watches per shard
|
||||
# Shard 0: [0:11255], Shard 1: [11255:22510], etc.
|
||||
shard_size = (total_watches + DATASTORE_AUDIT_SHARDS - 1) // DATASTORE_AUDIT_SHARDS
|
||||
start_idx = self._audit_shard_index * shard_size
|
||||
end_idx = min(start_idx + shard_size, total_watches)
|
||||
|
||||
# Handle wrap-around (shouldn't happen normally, but defensive)
|
||||
if start_idx >= total_watches:
|
||||
self._audit_shard_index = 0
|
||||
start_idx = 0
|
||||
end_idx = min(shard_size, total_watches)
|
||||
|
||||
# Audit only this shard's watches
|
||||
shard_uuids = all_uuids[start_idx:end_idx]
|
||||
|
||||
changes_found = 0
|
||||
errors = 0
|
||||
|
||||
for uuid in shard_uuids:
|
||||
try:
|
||||
# Get current watch dict and compute hash
|
||||
watch_dict = self._get_watch_dict(uuid)
|
||||
current_hash = self._compute_hash(watch_dict)
|
||||
stored_hash = self._watch_hashes.get(uuid)
|
||||
|
||||
# If hash changed and not already marked dirty, mark it
|
||||
if current_hash != stored_hash:
|
||||
with self.lock:
|
||||
if uuid not in self._dirty_watches:
|
||||
self._dirty_watches.add(uuid)
|
||||
changes_found += 1
|
||||
logger.warning(
|
||||
f"Audit detected unmarked change in watch {uuid[:8]}... current {current_hash:8} stored hash {stored_hash[:8]}"
|
||||
f"(hash changed but not marked dirty)"
|
||||
)
|
||||
self.needs_write = True
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
logger.trace(f"Audit error for watch {uuid[:8]}...: {e}")
|
||||
|
||||
audit_elapsed = (time.time() - audit_start) * 1000 # milliseconds
|
||||
|
||||
# Advance to next shard (wrap around after last shard)
|
||||
self._audit_shard_index = (self._audit_shard_index + 1) % DATASTORE_AUDIT_SHARDS
|
||||
|
||||
# Update metrics
|
||||
self._audit_count += 1
|
||||
self._audit_found_changes += changes_found
|
||||
self._last_audit_time = time.time()
|
||||
|
||||
if changes_found > 0:
|
||||
logger.warning(
|
||||
f"Audit shard {self._audit_shard_index}/{DATASTORE_AUDIT_SHARDS} found {changes_found} "
|
||||
f"unmarked changes in {len(shard_uuids)}/{total_watches} watches ({audit_elapsed:.1f}ms)"
|
||||
)
|
||||
else:
|
||||
logger.trace(
|
||||
f"Audit shard {self._audit_shard_index}/{DATASTORE_AUDIT_SHARDS}: "
|
||||
f"{len(shard_uuids)}/{total_watches} watches checked, 0 changes ({audit_elapsed:.1f}ms)"
|
||||
)
|
||||
|
||||
def save_datastore(self):
|
||||
"""
|
||||
Background thread that periodically saves dirty items and audits watches.
|
||||
|
||||
Runs two independent cycles:
|
||||
1. Save dirty items every DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS (default 10s)
|
||||
2. Rolling audit: every DATASTORE_AUDIT_INTERVAL_SECONDS (default 10s)
|
||||
- Scans 1/DATASTORE_AUDIT_SHARDS watches per cycle (default 1/5)
|
||||
- Full audit completes every 50s (10s × 5 shards)
|
||||
- Automatically handles new/deleted watches
|
||||
|
||||
Uses 0.5s sleep intervals for responsiveness to urgent saves.
|
||||
"""
|
||||
while True:
|
||||
if self.stop_thread:
|
||||
# Graceful shutdown: flush any remaining dirty items before stopping
|
||||
if self.needs_write or self._dirty_watches or self._dirty_settings:
|
||||
logger.warning("Datastore save thread stopping - flushing remaining dirty items...")
|
||||
try:
|
||||
self._save_dirty_items()
|
||||
logger.info("Graceful shutdown complete - all data saved")
|
||||
except Exception as e:
|
||||
logger.critical(f"FAILED to save dirty items during shutdown: {e}")
|
||||
else:
|
||||
logger.info("Datastore save thread stopping - no dirty items")
|
||||
return
|
||||
|
||||
# Check if it's time to run audit scan (every N seconds)
|
||||
if time.time() - self._last_audit_time >= DATASTORE_AUDIT_INTERVAL_SECONDS:
|
||||
try:
|
||||
self._audit_all_watches()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in audit cycle: {e}")
|
||||
|
||||
# Save dirty items if needed
|
||||
if self.needs_write or self.needs_write_urgent:
|
||||
try:
|
||||
self._save_dirty_items()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in save cycle: {e}")
|
||||
|
||||
# Timer with early break for urgent saves
|
||||
# Each iteration is 0.5 seconds, so iterations = DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS * 2
|
||||
for i in range(DATASTORE_SCAN_DIRTY_SAVE_INTERVAL_SECONDS * 2):
|
||||
time.sleep(0.5)
|
||||
if self.stop_thread or self.needs_write_urgent:
|
||||
break
|
||||
|
||||
def start_save_thread(self):
|
||||
"""Start the background save thread."""
|
||||
if not self.save_data_thread or not self.save_data_thread.is_alive():
|
||||
self.save_data_thread = Thread(target=self.save_datastore, daemon=True, name="DatastoreSaver")
|
||||
self.save_data_thread.start()
|
||||
logger.info("Datastore save thread started")
|
||||
|
||||
def force_save_all(self):
|
||||
"""
|
||||
Force immediate synchronous save of all changes to storage.
|
||||
|
||||
File backend implementation of the abstract force_save_all() method.
|
||||
Marks all watches and settings as dirty, then saves immediately.
|
||||
|
||||
Used by:
|
||||
- Backup creation (ensure everything is saved before backup)
|
||||
- Shutdown (ensure all changes are persisted)
|
||||
- Manual save operations
|
||||
"""
|
||||
logger.info("Force saving all data to storage...")
|
||||
|
||||
# Mark everything as dirty to ensure complete save
|
||||
for uuid in self.data['watching'].keys():
|
||||
self.mark_watch_dirty(uuid)
|
||||
self.mark_settings_dirty()
|
||||
|
||||
# Save immediately (synchronous)
|
||||
self._save_dirty_items()
|
||||
|
||||
logger.success("All data saved to storage")
|
||||
|
||||
def get_health_status(self):
|
||||
"""
|
||||
Get datastore health status for monitoring.
|
||||
|
||||
Returns:
|
||||
dict with health metrics and status
|
||||
"""
|
||||
now = time.time()
|
||||
time_since_last_save = now - self._last_save_time
|
||||
|
||||
with self.lock:
|
||||
dirty_count = len(self._dirty_watches)
|
||||
|
||||
is_thread_alive = self.save_data_thread and self.save_data_thread.is_alive()
|
||||
|
||||
# Determine health status
|
||||
if not is_thread_alive:
|
||||
status = "CRITICAL"
|
||||
message = "Save thread is DEAD"
|
||||
elif time_since_last_save > 300: # 5 minutes
|
||||
status = "WARNING"
|
||||
message = f"No save activity for {time_since_last_save:.0f}s"
|
||||
elif dirty_count > 1000:
|
||||
status = "WARNING"
|
||||
message = f"High backpressure: {dirty_count} watches pending"
|
||||
elif self._save_errors > 0 and (self._save_errors / max(self._total_saves, 1)) > 0.01:
|
||||
status = "WARNING"
|
||||
message = f"High error rate: {self._save_errors} errors"
|
||||
else:
|
||||
status = "HEALTHY"
|
||||
message = "Operating normally"
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"message": message,
|
||||
"thread_alive": is_thread_alive,
|
||||
"dirty_watches": dirty_count,
|
||||
"dirty_settings": self._dirty_settings,
|
||||
"last_save_seconds_ago": int(time_since_last_save),
|
||||
"save_cycles": self._save_cycle_count,
|
||||
"total_saves": self._total_saves,
|
||||
"total_errors": self._save_errors,
|
||||
"error_rate_percent": round((self._save_errors / max(self._total_saves, 1)) * 100, 2)
|
||||
}
|
||||
66
changedetectionio/store/legacy_loader.py
Normal file
66
changedetectionio/store/legacy_loader.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""
|
||||
Legacy format loader for url-watches.json.
|
||||
|
||||
Provides functions to detect and load from the legacy monolithic JSON format.
|
||||
Used during migration (update_26) to transition to individual watch.json files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from loguru import logger
|
||||
|
||||
# Try to import orjson for faster JSON serialization
|
||||
try:
|
||||
import orjson
|
||||
HAS_ORJSON = True
|
||||
except ImportError:
|
||||
HAS_ORJSON = False
|
||||
|
||||
|
||||
def has_legacy_datastore(datastore_path):
|
||||
"""
|
||||
Check if a legacy url-watches.json file exists.
|
||||
|
||||
This is used by update_26 to determine if migration is needed.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to datastore directory
|
||||
|
||||
Returns:
|
||||
bool: True if url-watches.json exists
|
||||
"""
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
return os.path.exists(url_watches_json)
|
||||
|
||||
|
||||
def load_legacy_format(json_store_path):
|
||||
"""
|
||||
Load datastore from legacy url-watches.json format.
|
||||
|
||||
Args:
|
||||
json_store_path: Full path to url-watches.json file
|
||||
|
||||
Returns:
|
||||
dict: Loaded datastore data with 'watching', 'settings', etc.
|
||||
None: If file doesn't exist or loading failed
|
||||
"""
|
||||
logger.info(f"Loading from legacy format: {json_store_path}")
|
||||
|
||||
if not os.path.isfile(json_store_path):
|
||||
logger.warning(f"Legacy file not found: {json_store_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
if HAS_ORJSON:
|
||||
with open(json_store_path, 'rb') as f:
|
||||
data = orjson.loads(f.read())
|
||||
else:
|
||||
with open(json_store_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
logger.info(f"Loaded {len(data.get('watching', {}))} watches from legacy format")
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load legacy format: {e}")
|
||||
return None
|
||||
686
changedetectionio/store/updates.py
Normal file
686
changedetectionio/store/updates.py
Normal file
@@ -0,0 +1,686 @@
|
||||
"""
|
||||
Schema update migrations for the datastore.
|
||||
|
||||
This module contains all schema version upgrade methods (update_1 through update_N).
|
||||
These are mixed into ChangeDetectionStore to keep the main store file focused.
|
||||
|
||||
IMPORTANT: Each update could be run even when they have a new install and the schema is correct.
|
||||
Therefore - each `update_n` should be very careful about checking if it needs to actually run.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tarfile
|
||||
import time
|
||||
from loguru import logger
|
||||
from copy import deepcopy
|
||||
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
from ..processors.restock_diff import Restock
|
||||
from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
|
||||
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from .file_saving_datastore import save_watch_atomic
|
||||
|
||||
|
||||
def create_backup_tarball(datastore_path, update_number):
|
||||
"""
|
||||
Create a tarball backup of the entire datastore structure before running an update.
|
||||
|
||||
Includes:
|
||||
- All {uuid}/watch.json files
|
||||
- changedetection.json (settings, if it exists)
|
||||
- url-watches.json (legacy format, if it exists)
|
||||
- Directory structure preserved
|
||||
|
||||
Args:
|
||||
datastore_path: Path to datastore directory
|
||||
update_number: Update number being applied
|
||||
|
||||
Returns:
|
||||
str: Path to created tarball, or None if backup failed
|
||||
|
||||
Restoration:
|
||||
To restore from a backup:
|
||||
cd /path/to/datastore
|
||||
tar -xzf before-update-N-timestamp.tar.gz
|
||||
This will restore all watch.json files and settings to their pre-update state.
|
||||
"""
|
||||
timestamp = int(time.time())
|
||||
backup_filename = f"before-update-{update_number}-{timestamp}.tar.gz"
|
||||
backup_path = os.path.join(datastore_path, backup_filename)
|
||||
|
||||
try:
|
||||
logger.info(f"Creating backup tarball: {backup_filename}")
|
||||
|
||||
with tarfile.open(backup_path, "w:gz") as tar:
|
||||
# Backup changedetection.json if it exists (new format)
|
||||
changedetection_json = os.path.join(datastore_path, "changedetection.json")
|
||||
if os.path.isfile(changedetection_json):
|
||||
tar.add(changedetection_json, arcname="changedetection.json")
|
||||
logger.debug("Added changedetection.json to backup")
|
||||
|
||||
# Backup url-watches.json if it exists (legacy format)
|
||||
url_watches_json = os.path.join(datastore_path, "url-watches.json")
|
||||
if os.path.isfile(url_watches_json):
|
||||
tar.add(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Backup all watch directories with their watch.json files
|
||||
# This preserves the UUID directory structure
|
||||
watch_count = 0
|
||||
for entry in os.listdir(datastore_path):
|
||||
entry_path = os.path.join(datastore_path, entry)
|
||||
|
||||
# Skip if not a directory
|
||||
if not os.path.isdir(entry_path):
|
||||
continue
|
||||
|
||||
# Skip hidden directories and backup directories
|
||||
if entry.startswith('.') or entry.startswith('before-update-'):
|
||||
continue
|
||||
|
||||
# Check if this directory has a watch.json (indicates it's a watch UUID directory)
|
||||
watch_json = os.path.join(entry_path, "watch.json")
|
||||
if os.path.isfile(watch_json):
|
||||
# Add the watch.json file preserving directory structure
|
||||
tar.add(watch_json, arcname=f"{entry}/watch.json")
|
||||
watch_count += 1
|
||||
|
||||
if watch_count % 100 == 0:
|
||||
logger.debug(f"Backed up {watch_count} watch.json files...")
|
||||
|
||||
logger.success(f"Backup created: {backup_filename} ({watch_count} watches)")
|
||||
return backup_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create backup tarball: {e}")
|
||||
# Try to clean up partial backup
|
||||
if os.path.exists(backup_path):
|
||||
try:
|
||||
os.unlink(backup_path)
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
class DatastoreUpdatesMixin:
|
||||
"""
|
||||
Mixin class containing all schema update methods.
|
||||
|
||||
This class is inherited by ChangeDetectionStore to provide schema migration functionality.
|
||||
Each update_N method upgrades the schema from version N-1 to version N.
|
||||
"""
|
||||
|
||||
def get_updates_available(self):
|
||||
"""
|
||||
Discover all available update methods.
|
||||
|
||||
Returns:
|
||||
list: Sorted list of update version numbers (e.g., [1, 2, 3, ..., 26])
|
||||
"""
|
||||
import inspect
|
||||
updates_available = []
|
||||
for i, o in inspect.getmembers(self, predicate=inspect.ismethod):
|
||||
m = re.search(r'update_(\d+)$', i)
|
||||
if m:
|
||||
updates_available.append(int(m.group(1)))
|
||||
updates_available.sort()
|
||||
|
||||
return updates_available
|
||||
|
||||
def run_updates(self, current_schema_version=None):
|
||||
"""
|
||||
Run all pending schema updates sequentially.
|
||||
|
||||
Args:
|
||||
current_schema_version: Optional current schema version. If provided, only run updates
|
||||
greater than this version. If None, uses the schema version from
|
||||
the datastore. If no schema version exists in datastore and it appears
|
||||
to be a fresh install, sets to latest update number (no updates needed).
|
||||
|
||||
IMPORTANT: Each update could be run even when they have a new install and the schema is correct.
|
||||
Therefore - each `update_n` should be very careful about checking if it needs to actually run.
|
||||
|
||||
Process:
|
||||
1. Get list of available updates
|
||||
2. For each update > current schema version:
|
||||
- Create backup of datastore
|
||||
- Run update method
|
||||
- Update schema version
|
||||
- Mark settings and watches dirty
|
||||
3. If any update fails, stop processing
|
||||
4. Save all changes immediately
|
||||
"""
|
||||
updates_available = self.get_updates_available()
|
||||
|
||||
# Determine current schema version
|
||||
if current_schema_version is None:
|
||||
# Check if schema_version exists in datastore
|
||||
current_schema_version = self.data['settings']['application'].get('schema_version')
|
||||
|
||||
if current_schema_version is None:
|
||||
# No schema version found - could be a fresh install or very old datastore
|
||||
# If this is a fresh/new config with no watches, assume it's up-to-date
|
||||
# and set to latest update number (no updates needed)
|
||||
if len(self.data['watching']) == 0:
|
||||
# Get the highest update number from available update methods
|
||||
latest_update = updates_available[-1] if updates_available else 0
|
||||
logger.info(f"No schema version found and no watches exist - assuming fresh install, setting schema_version to {latest_update}")
|
||||
self.data['settings']['application']['schema_version'] = latest_update
|
||||
self.mark_settings_dirty()
|
||||
return # No updates needed for fresh install
|
||||
else:
|
||||
# Has watches but no schema version - likely old datastore, run all updates
|
||||
logger.warning("No schema version found but watches exist - running all updates from version 0")
|
||||
current_schema_version = 0
|
||||
|
||||
logger.info(f"Current schema version: {current_schema_version}")
|
||||
|
||||
updates_ran = []
|
||||
|
||||
for update_n in updates_available:
|
||||
if update_n > current_schema_version:
|
||||
logger.critical(f"Applying update_{update_n}")
|
||||
|
||||
# Create tarball backup of entire datastore structure
|
||||
# This includes all watch.json files, settings, and preserves directory structure
|
||||
backup_path = create_backup_tarball(self.datastore_path, update_n)
|
||||
if backup_path:
|
||||
logger.info(f"Backup created at: {backup_path}")
|
||||
else:
|
||||
logger.warning("Backup creation failed, but continuing with update")
|
||||
|
||||
try:
|
||||
update_method = getattr(self, f"update_{update_n}")()
|
||||
except Exception as e:
|
||||
logger.error(f"Error while trying update_{update_n}")
|
||||
logger.error(e)
|
||||
# Don't run any more updates
|
||||
return
|
||||
else:
|
||||
# Bump the version, important
|
||||
self.data['settings']['application']['schema_version'] = update_n
|
||||
self.mark_settings_dirty()
|
||||
|
||||
# CRITICAL: Mark all watches as dirty so changes are persisted
|
||||
# Most updates modify watches, and in the new individual watch.json structure,
|
||||
# we need to ensure those changes are saved
|
||||
logger.info(f"Marking all {len(self.data['watching'])} watches as dirty after update_{update_n} (so that it saves them to disk)")
|
||||
for uuid in self.data['watching'].keys():
|
||||
self.mark_watch_dirty(uuid)
|
||||
|
||||
# Save changes immediately after each update (more resilient than batching)
|
||||
logger.critical(f"Saving all changes after update_{update_n}")
|
||||
try:
|
||||
self._save_dirty_items()
|
||||
logger.success(f"Update {update_n} changes saved successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save update_{update_n} changes: {e}")
|
||||
# Don't raise - update already ran, but changes might not be persisted
|
||||
# The update will try to run again on next startup
|
||||
|
||||
# Track which updates ran
|
||||
updates_ran.append(update_n)
|
||||
|
||||
# ============================================================================
|
||||
# Individual Update Methods
|
||||
# ============================================================================
|
||||
|
||||
def update_1(self):
|
||||
"""Convert minutes to seconds on settings and each watch."""
|
||||
if self.data['settings']['requests'].get('minutes_between_check'):
|
||||
self.data['settings']['requests']['time_between_check']['minutes'] = self.data['settings']['requests']['minutes_between_check']
|
||||
# Remove the default 'hours' that is set from the model
|
||||
self.data['settings']['requests']['time_between_check']['hours'] = None
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if 'minutes_between_check' in watch:
|
||||
# Only upgrade individual watch time if it was set
|
||||
if watch.get('minutes_between_check', False):
|
||||
self.data['watching'][uuid]['time_between_check']['minutes'] = watch['minutes_between_check']
|
||||
|
||||
def update_2(self):
|
||||
"""
|
||||
Move the history list to a flat text file index.
|
||||
Better than SQLite because this list is only appended to, and works across NAS / NFS type setups.
|
||||
"""
|
||||
# @todo test running this on a newly updated one (when this already ran)
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
history = []
|
||||
|
||||
if watch.get('history', False):
|
||||
for d, p in watch['history'].items():
|
||||
d = int(d) # Used to be keyed as str, we'll fix this now too
|
||||
history.append("{},{}\n".format(d, p))
|
||||
|
||||
if len(history):
|
||||
target_path = os.path.join(self.datastore_path, uuid)
|
||||
if os.path.exists(target_path):
|
||||
with open(os.path.join(target_path, "history.txt"), "w") as f:
|
||||
f.writelines(history)
|
||||
else:
|
||||
logger.warning(f"Datastore history directory {target_path} does not exist, skipping history import.")
|
||||
|
||||
# No longer needed, dynamically pulled from the disk when needed.
|
||||
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
|
||||
# In the distant future we can remove this entirely
|
||||
self.data['watching'][uuid]['history'] = {}
|
||||
|
||||
def update_3(self):
|
||||
"""We incorrectly stored last_changed when there was not a change, and then confused the output list table."""
|
||||
# see https://github.com/dgtlmoon/changedetection.io/pull/835
|
||||
return
|
||||
|
||||
def update_4(self):
|
||||
"""`last_changed` not needed, we pull that information from the history.txt index."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
# Remove it from the struct
|
||||
del(watch['last_changed'])
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_5(self):
|
||||
"""
|
||||
If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings.
|
||||
In other words - the watch notification_title and notification_body are not needed if they are the same as the default one.
|
||||
"""
|
||||
current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
watch_body = watch.get('notification_body', '')
|
||||
if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body:
|
||||
# Looks the same as the default one, so unset it
|
||||
watch['notification_body'] = None
|
||||
|
||||
watch_title = watch.get('notification_title', '')
|
||||
if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title:
|
||||
# Looks the same as the default one, so unset it
|
||||
watch['notification_title'] = None
|
||||
except Exception as e:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_7(self):
|
||||
"""
|
||||
We incorrectly used common header overrides that should only apply to Requests.
|
||||
These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium.
|
||||
"""
|
||||
# These were hard-coded in early versions
|
||||
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
|
||||
if self.data['settings']['headers'].get(v):
|
||||
del self.data['settings']['headers'][v]
|
||||
|
||||
def update_8(self):
|
||||
"""Convert filters to a list of filters css_filter -> include_filters."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
existing_filter = watch.get('css_filter', '')
|
||||
if existing_filter:
|
||||
watch['include_filters'] = [existing_filter]
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_9(self):
|
||||
"""Convert old static notification tokens to jinja2 tokens."""
|
||||
# Each watch
|
||||
# only { } not {{ or }}
|
||||
r = r'(?<!{){(?!{)(\w+)(?<!})}(?!})'
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
n_body = watch.get('notification_body', '')
|
||||
if n_body:
|
||||
watch['notification_body'] = re.sub(r, r'{{\1}}', n_body)
|
||||
|
||||
n_title = watch.get('notification_title')
|
||||
if n_title:
|
||||
watch['notification_title'] = re.sub(r, r'{{\1}}', n_title)
|
||||
|
||||
n_urls = watch.get('notification_urls')
|
||||
if n_urls:
|
||||
for i, url in enumerate(n_urls):
|
||||
watch['notification_urls'][i] = re.sub(r, r'{{\1}}', url)
|
||||
|
||||
except:
|
||||
continue
|
||||
|
||||
# System wide
|
||||
n_body = self.data['settings']['application'].get('notification_body')
|
||||
if n_body:
|
||||
self.data['settings']['application']['notification_body'] = re.sub(r, r'{{\1}}', n_body)
|
||||
|
||||
n_title = self.data['settings']['application'].get('notification_title')
|
||||
if n_body:
|
||||
self.data['settings']['application']['notification_title'] = re.sub(r, r'{{\1}}', n_title)
|
||||
|
||||
n_urls = self.data['settings']['application'].get('notification_urls')
|
||||
if n_urls:
|
||||
for i, url in enumerate(n_urls):
|
||||
self.data['settings']['application']['notification_urls'][i] = re.sub(r, r'{{\1}}', url)
|
||||
|
||||
return
|
||||
|
||||
def update_10(self):
|
||||
"""Some setups may have missed the correct default, so it shows the wrong config in the UI, although it will default to system-wide."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
if not watch.get('fetch_backend', ''):
|
||||
watch['fetch_backend'] = 'system'
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
def update_12(self):
|
||||
"""Create tag objects and their references from existing tag text."""
|
||||
i = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
# Split out and convert old tag string
|
||||
tag = watch.get('tag')
|
||||
if tag:
|
||||
tag_uuids = []
|
||||
for t in tag.split(','):
|
||||
tag_uuids.append(self.add_tag(title=t))
|
||||
|
||||
self.data['watching'][uuid]['tags'] = tag_uuids
|
||||
|
||||
def update_13(self):
|
||||
"""#1775 - Update 11 did not update the records correctly when adding 'date_created' values for sorting."""
|
||||
i = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if not watch.get('date_created'):
|
||||
self.data['watching'][uuid]['date_created'] = i
|
||||
i += 1
|
||||
return
|
||||
|
||||
def update_14(self):
|
||||
"""#1774 - protect xpath1 against migration."""
|
||||
for awatch in self.data["watching"]:
|
||||
if self.data["watching"][awatch]['include_filters']:
|
||||
for num, selector in enumerate(self.data["watching"][awatch]['include_filters']):
|
||||
if selector.startswith('/'):
|
||||
self.data["watching"][awatch]['include_filters'][num] = 'xpath1:' + selector
|
||||
if selector.startswith('xpath:'):
|
||||
self.data["watching"][awatch]['include_filters'][num] = selector.replace('xpath:', 'xpath1:', 1)
|
||||
|
||||
def update_15(self):
|
||||
"""Use more obvious default time setting."""
|
||||
for uuid in self.data["watching"]:
|
||||
if self.data["watching"][uuid]['time_between_check'] == self.data['settings']['requests']['time_between_check']:
|
||||
# What the old logic was, which was pretty confusing
|
||||
self.data["watching"][uuid]['time_between_check_use_default'] = True
|
||||
elif all(value is None or value == 0 for value in self.data["watching"][uuid]['time_between_check'].values()):
|
||||
self.data["watching"][uuid]['time_between_check_use_default'] = True
|
||||
else:
|
||||
# Something custom here
|
||||
self.data["watching"][uuid]['time_between_check_use_default'] = False
|
||||
|
||||
def update_16(self):
|
||||
"""Correctly set datatype for older installs where 'tag' was string and update_12 did not catch it."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if isinstance(watch.get('tags'), str):
|
||||
self.data['watching'][uuid]['tags'] = []
|
||||
|
||||
def update_17(self):
|
||||
"""Migrate old 'in_stock' values to the new Restock."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if 'in_stock' in watch:
|
||||
watch['restock'] = Restock({'in_stock': watch.get('in_stock')})
|
||||
del watch['in_stock']
|
||||
|
||||
def update_18(self):
|
||||
"""Migrate old restock settings."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if not watch.get('restock_settings'):
|
||||
# So we enable price following by default
|
||||
self.data['watching'][uuid]['restock_settings'] = {'follow_price_changes': True}
|
||||
|
||||
# Migrate and cleanoff old value
|
||||
self.data['watching'][uuid]['restock_settings']['in_stock_processing'] = 'in_stock_only' if watch.get(
|
||||
'in_stock_only') else 'all_changes'
|
||||
|
||||
if self.data['watching'][uuid].get('in_stock_only'):
|
||||
del (self.data['watching'][uuid]['in_stock_only'])
|
||||
|
||||
def update_19(self):
|
||||
"""Compress old elements.json to elements.deflate, saving disk, this compression is pretty fast."""
|
||||
import zlib
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
json_path = os.path.join(self.datastore_path, uuid, "elements.json")
|
||||
deflate_path = os.path.join(self.datastore_path, uuid, "elements.deflate")
|
||||
|
||||
if os.path.exists(json_path):
|
||||
with open(json_path, "rb") as f_j:
|
||||
with open(deflate_path, "wb") as f_d:
|
||||
logger.debug(f"Compressing {str(json_path)} to {str(deflate_path)}..")
|
||||
f_d.write(zlib.compress(f_j.read()))
|
||||
os.unlink(json_path)
|
||||
|
||||
def update_20(self):
|
||||
"""Migrate extract_title_as_title to use_page_title_in_list."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if self.data['watching'][uuid].get('extract_title_as_title'):
|
||||
self.data['watching'][uuid]['use_page_title_in_list'] = self.data['watching'][uuid].get('extract_title_as_title')
|
||||
del self.data['watching'][uuid]['extract_title_as_title']
|
||||
|
||||
if self.data['settings']['application'].get('extract_title_as_title'):
|
||||
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
|
||||
|
||||
def update_21(self):
|
||||
"""Migrate timezone to scheduler_timezone_default."""
|
||||
if self.data['settings']['application'].get('timezone'):
|
||||
self.data['settings']['application']['scheduler_timezone_default'] = self.data['settings']['application'].get('timezone')
|
||||
del self.data['settings']['application']['timezone']
|
||||
|
||||
def update_23(self):
|
||||
"""Some notification formats got the wrong name type."""
|
||||
|
||||
def re_run(formats):
|
||||
sys_n_format = self.data['settings']['application'].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == sys_n_format), None)
|
||||
if key_exists_as_value: # key of "Plain text"
|
||||
logger.success(f"['settings']['application']['notification_format'] '{sys_n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['settings']['application']['notification_format'] = key_exists_as_value
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
n_format = self.data['watching'][uuid].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
|
||||
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
|
||||
logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['watching'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever
|
||||
|
||||
for uuid, tag in self.data['settings']['application']['tags'].items():
|
||||
n_format = self.data['settings']['application']['tags'][uuid].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
|
||||
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
|
||||
logger.success(
|
||||
f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['settings']['application']['tags'][uuid][
|
||||
'notification_format'] = key_exists_as_value # should be 'text' or whatever
|
||||
|
||||
from ..notification import valid_notification_formats
|
||||
formats = deepcopy(valid_notification_formats)
|
||||
re_run(formats)
|
||||
# And in previous versions, it was "text" instead of Plain text, Markdown instead of "Markdown to HTML"
|
||||
formats['text'] = 'Text'
|
||||
formats['markdown'] = 'Markdown'
|
||||
re_run(formats)
|
||||
|
||||
def update_24(self):
|
||||
"""RSS types should be inline with the same names as notification types."""
|
||||
rss_format = self.data['settings']['application'].get('rss_content_format')
|
||||
if not rss_format or 'text' in rss_format:
|
||||
# might have been 'plaintext, 'plain text' or something
|
||||
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
|
||||
elif 'html' in rss_format:
|
||||
self.data['settings']['application']['rss_content_format'] = 'htmlcolor'
|
||||
else:
|
||||
# safe fallback to text
|
||||
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
|
||||
|
||||
def update_25(self):
|
||||
"""Different processors now hold their own history.txt."""
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
processor = self.data['watching'][uuid].get('processor')
|
||||
if processor != 'text_json_diff':
|
||||
old_history_txt = os.path.join(self.datastore_path, "history.txt")
|
||||
target_history_name = f"history-{processor}.txt"
|
||||
if os.path.isfile(old_history_txt) and not os.path.isfile(target_history_name):
|
||||
new_history_txt = os.path.join(self.datastore_path, target_history_name)
|
||||
logger.debug(f"Renaming history index {old_history_txt} to {new_history_txt}...")
|
||||
shutil.move(old_history_txt, new_history_txt)
|
||||
|
||||
def migrate_legacy_db_format(self):
|
||||
"""
|
||||
Migration: Individual watch persistence (COPY-based, safe rollback).
|
||||
|
||||
Loads legacy url-watches.json format and migrates to:
|
||||
- {uuid}/watch.json (per watch)
|
||||
- changedetection.json (settings only)
|
||||
|
||||
IMPORTANT:
|
||||
- A tarball backup (before-update-26-timestamp.tar.gz) is created before migration
|
||||
- url-watches.json is LEFT INTACT for rollback safety
|
||||
- Users can roll back by simply downgrading to the previous version
|
||||
- Or restore from tarball: tar -xzf before-update-26-*.tar.gz
|
||||
|
||||
This is a dedicated migration release - users upgrade at their own pace.
|
||||
"""
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("Running migration: Individual watch persistence (update_26)")
|
||||
logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
# Check if already migrated
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
if os.path.exists(changedetection_json):
|
||||
logger.info("Migration already completed (changedetection.json exists), skipping")
|
||||
return
|
||||
|
||||
# Check if we need to load legacy data
|
||||
from .legacy_loader import has_legacy_datastore, load_legacy_format
|
||||
|
||||
if not has_legacy_datastore(self.datastore_path):
|
||||
logger.info("No legacy datastore found, nothing to migrate")
|
||||
return
|
||||
|
||||
# Load legacy data from url-watches.json
|
||||
logger.critical("Loading legacy datastore from url-watches.json...")
|
||||
legacy_path = os.path.join(self.datastore_path, "url-watches.json")
|
||||
legacy_data = load_legacy_format(legacy_path)
|
||||
|
||||
if not legacy_data:
|
||||
raise Exception("Failed to load legacy datastore from url-watches.json")
|
||||
|
||||
# Populate settings from legacy data
|
||||
logger.info("Populating settings from legacy data...")
|
||||
watch_count = len(self.data['watching'])
|
||||
logger.success(f"Loaded {watch_count} watches from legacy format")
|
||||
|
||||
# Phase 1: Save all watches to individual files
|
||||
logger.critical(f"Phase 1/4: Saving {watch_count} watches to individual watch.json files...")
|
||||
|
||||
saved_count = 0
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
watch_dict = dict(watch)
|
||||
watch_dir = os.path.join(self.datastore_path, uuid)
|
||||
save_watch_atomic(watch_dir, uuid, watch_dict)
|
||||
saved_count += 1
|
||||
|
||||
if saved_count % 100 == 0:
|
||||
logger.info(f" Progress: {saved_count}/{watch_count} watches migrated...")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save watch {uuid}: {e}")
|
||||
raise Exception(
|
||||
f"Migration failed: Could not save watch {uuid}. "
|
||||
f"url-watches.json remains intact, safe to retry. Error: {e}"
|
||||
)
|
||||
|
||||
logger.critical(f"Phase 1 complete: Saved {saved_count} watches")
|
||||
|
||||
# Phase 2: Verify all files exist
|
||||
logger.critical("Phase 2/4: Verifying all watch.json files were created...")
|
||||
|
||||
missing = []
|
||||
for uuid in self.data['watching'].keys():
|
||||
watch_json = os.path.join(self.datastore_path, uuid, "watch.json")
|
||||
if not os.path.isfile(watch_json):
|
||||
missing.append(uuid)
|
||||
|
||||
if missing:
|
||||
raise Exception(
|
||||
f"Migration failed: {len(missing)} watch files missing: {missing[:5]}... "
|
||||
f"url-watches.json remains intact, safe to retry."
|
||||
)
|
||||
|
||||
logger.critical(f"Phase 2 complete: Verified {watch_count} watch files")
|
||||
|
||||
# Phase 3: Create new settings file
|
||||
logger.critical("Phase 3/4: Creating changedetection.json...")
|
||||
|
||||
try:
|
||||
self._save_settings()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create changedetection.json: {e}")
|
||||
raise Exception(
|
||||
f"Migration failed: Could not create changedetection.json. "
|
||||
f"url-watches.json remains intact, safe to retry. Error: {e}"
|
||||
)
|
||||
|
||||
# Phase 4: Verify settings file exists
|
||||
logger.critical("Phase 4/4: Verifying changedetection.json exists...")
|
||||
|
||||
if not os.path.isfile(changedetection_json):
|
||||
raise Exception(
|
||||
"Migration failed: changedetection.json not found after save. "
|
||||
"url-watches.json remains intact, safe to retry."
|
||||
)
|
||||
|
||||
logger.critical("Phase 4 complete: Verified changedetection.json exists")
|
||||
|
||||
# Success! Now reload from new format
|
||||
logger.critical("Reloading datastore from new format...")
|
||||
self._load_state() # Includes load_watches
|
||||
logger.success("Datastore reloaded from new format successfully")
|
||||
|
||||
|
||||
# Verify all watches have hashes after migration
|
||||
missing_hashes = [uuid for uuid in self.data['watching'].keys() if uuid not in self._watch_hashes]
|
||||
if missing_hashes:
|
||||
logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after migration: {missing_hashes[:5]}")
|
||||
else:
|
||||
logger.success(f"All {len(self.data['watching'])} watches have valid hashes after migration")
|
||||
|
||||
# Set schema version to latest available update
|
||||
# This prevents re-running updates and re-marking all watches as dirty
|
||||
updates_available = self.get_updates_available()
|
||||
latest_schema = updates_available[-1] if updates_available else 26
|
||||
self.data['settings']['application']['schema_version'] = latest_schema
|
||||
self.mark_settings_dirty()
|
||||
logger.info(f"Set schema_version to {latest_schema} (migration complete, all watches already saved)")
|
||||
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
|
||||
logger.critical("=" * 80)
|
||||
logger.info("")
|
||||
logger.info("New format:")
|
||||
logger.info(f" - {watch_count} individual watch.json files created")
|
||||
logger.info(f" - changedetection.json created (settings only)")
|
||||
logger.info("")
|
||||
logger.info("Rollback safety:")
|
||||
logger.info(" - url-watches.json preserved for rollback")
|
||||
logger.info(" - To rollback: downgrade to previous version and restart")
|
||||
logger.info(" - No manual file operations needed")
|
||||
logger.info("")
|
||||
logger.info("Optional cleanup (after testing new version):")
|
||||
logger.info(f" - rm {os.path.join(self.datastore_path, 'url-watches.json')}")
|
||||
logger.info("")
|
||||
|
||||
def update_26(self):
|
||||
self.migrate_legacy_db_format()
|
||||
@@ -27,7 +27,7 @@
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="{{url_for('static_content', group='favicons', filename='apple-touch-icon.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="{{url_for('static_content', group='favicons', filename='favicon-32x32.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="{{url_for('static_content', group='favicons', filename='favicon-16x16.png')}}">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}" crossorigin="use-credentials">
|
||||
<link rel="mask-icon" href="{{url_for('static_content', group='favicons', filename='safari-pinned-tab.svg')}}" color="#5bbad5">
|
||||
<link rel="shortcut icon" href="{{url_for('static_content', group='favicons', filename='favicon.ico')}}">
|
||||
<meta name="msapplication-TileColor" content="#da532c">
|
||||
@@ -265,6 +265,9 @@
|
||||
</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div>
|
||||
<a href="{{ url_for('ui.delete_locale_language_session_var_if_it_exists', redirect=request.path) }}" >{{ _('Auto-detect from browser') }}</a>
|
||||
</div>
|
||||
<div>
|
||||
{{ _('Language support is in beta, please help us improve by opening a PR on GitHub with any updates.') }}
|
||||
</div>
|
||||
|
||||
265
changedetectionio/test_cli_opts.sh
Executable file
265
changedetectionio/test_cli_opts.sh
Executable file
@@ -0,0 +1,265 @@
|
||||
#!/bin/bash
|
||||
# Test script for CLI options - Parallel execution
|
||||
# Tests -u, -uN, -r, -b flags
|
||||
|
||||
set -u # Exit on undefined variables
|
||||
|
||||
# Color output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Test results directory (for parallel safety)
|
||||
TEST_RESULTS_DIR="/tmp/cli-test-results-$$"
|
||||
mkdir -p "$TEST_RESULTS_DIR"
|
||||
|
||||
# Cleanup function
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "=== Cleaning up test directories ==="
|
||||
rm -rf /tmp/cli-test-* 2>/dev/null || true
|
||||
rm -rf "$TEST_RESULTS_DIR" 2>/dev/null || true
|
||||
# Kill any hanging processes
|
||||
pkill -f "changedetection.py.*cli-test" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Helper to record test result
|
||||
record_result() {
|
||||
local test_num=$1
|
||||
local status=$2 # pass or fail
|
||||
local message=$3
|
||||
|
||||
echo "$status|$message" > "$TEST_RESULTS_DIR/test_${test_num}.result"
|
||||
}
|
||||
|
||||
# Run a test in background
|
||||
run_test() {
|
||||
local test_num=$1
|
||||
local test_name=$2
|
||||
local test_func=$3
|
||||
|
||||
(
|
||||
echo -e "${YELLOW}[Test $test_num]${NC} $test_name"
|
||||
if $test_func "$test_num"; then
|
||||
record_result "$test_num" "pass" "$test_name"
|
||||
echo -e "${GREEN}✓ PASS${NC}: $test_name"
|
||||
else
|
||||
record_result "$test_num" "fail" "$test_name"
|
||||
echo -e "${RED}✗ FAIL${NC}: $test_name"
|
||||
fi
|
||||
) &
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Test Functions (each runs independently)
|
||||
# =============================================================================
|
||||
|
||||
test_help_flag() {
|
||||
local test_id=$1
|
||||
timeout 3 python3 changedetection.py --help 2>&1 | grep -q "Add URLs on startup"
|
||||
}
|
||||
|
||||
test_version_flag() {
|
||||
local test_id=$1
|
||||
timeout 3 python3 changedetection.py --version 2>&1 | grep -qE "changedetection.io [0-9]+\.[0-9]+"
|
||||
}
|
||||
|
||||
test_single_url() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-single-${test_id}-$$"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C -u https://example.com -b &>/dev/null
|
||||
# Count watch directories (UUID directories containing watch.json)
|
||||
[ "$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' | wc -l)" -eq 1 ]
|
||||
}
|
||||
|
||||
test_multiple_urls() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-multi-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-u https://httpbin.org \
|
||||
-b &>/dev/null
|
||||
# Count watch directories (UUID directories containing watch.json)
|
||||
[ "$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' | wc -l)" -eq 3 ]
|
||||
}
|
||||
|
||||
test_url_with_options() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-opts-${test_id}-$$"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 '{"title":"Test Site","processor":"text_json_diff"}' \
|
||||
-b &>/dev/null
|
||||
# Check that at least one watch.json contains the title "Test Site"
|
||||
python3 -c "
|
||||
import json, glob, sys
|
||||
watch_files = glob.glob('$dir/*/watch.json')
|
||||
for wf in watch_files:
|
||||
with open(wf) as f:
|
||||
data = json.load(f)
|
||||
if data.get('title') == 'Test Site':
|
||||
sys.exit(0)
|
||||
sys.exit(1)
|
||||
"
|
||||
}
|
||||
|
||||
test_multiple_urls_with_options() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-multi-opts-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 '{"title":"Site One"}' \
|
||||
-u https://github.com \
|
||||
-u1 '{"title":"Site Two"}' \
|
||||
-b &>/dev/null
|
||||
# Check that we have 2 watches and both titles are present
|
||||
python3 -c "
|
||||
import json, glob, sys
|
||||
watch_files = glob.glob('$dir/*/watch.json')
|
||||
if len(watch_files) != 2:
|
||||
sys.exit(1)
|
||||
titles = []
|
||||
for wf in watch_files:
|
||||
with open(wf) as f:
|
||||
data = json.load(f)
|
||||
titles.append(data.get('title'))
|
||||
sys.exit(0 if 'Site One' in titles and 'Site Two' in titles else 1)
|
||||
"
|
||||
}
|
||||
|
||||
test_batch_mode_exit() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-batch-${test_id}-$$"
|
||||
local start=$(date +%s)
|
||||
timeout 15 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-b &>/dev/null
|
||||
local end=$(date +%s)
|
||||
local elapsed=$((end - start))
|
||||
[ $elapsed -lt 14 ]
|
||||
}
|
||||
|
||||
test_recheck_all() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-recheck-all-${test_id}-$$"
|
||||
# Create a watch using CLI, then recheck it
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C -u https://example.com -b &>/dev/null
|
||||
# Now recheck all watches
|
||||
timeout 10 python3 changedetection.py -d "$dir" -r all -b 2>&1 | grep -q "Queuing"
|
||||
}
|
||||
|
||||
test_recheck_specific() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-recheck-uuid-${test_id}-$$"
|
||||
# Create 2 watches using CLI
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-b &>/dev/null
|
||||
# Get the UUIDs that were created
|
||||
local uuids=$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' -exec dirname {} \; | xargs -n1 basename | tr '\n' ',' | sed 's/,$//')
|
||||
# Now recheck specific UUIDs
|
||||
timeout 10 python3 changedetection.py -d "$dir" -r "$uuids" -b 2>&1 | grep -q "Queuing"
|
||||
}
|
||||
|
||||
test_combined_operations() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-combined-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-r all \
|
||||
-b &>/dev/null
|
||||
# Count watch directories (UUID directories containing watch.json)
|
||||
[ "$(find "$dir" -mindepth 2 -maxdepth 2 -name 'watch.json' | wc -l)" -eq 2 ]
|
||||
}
|
||||
|
||||
test_invalid_json() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-invalid-${test_id}-$$"
|
||||
timeout 5 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 'invalid json here' \
|
||||
2>&1 | grep -qi "invalid json\|json decode error"
|
||||
}
|
||||
|
||||
test_create_directory() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-create-${test_id}-$$/nested/path"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-b &>/dev/null
|
||||
[ -d "$dir" ]
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main Test Execution
|
||||
# =============================================================================
|
||||
|
||||
echo "=========================================="
|
||||
echo " CLI Options Test Suite (Parallel)"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Launch all tests in parallel
|
||||
run_test 1 "Help flag (--help) shows usage without initialization" test_help_flag
|
||||
run_test 2 "Version flag (--version) displays version" test_version_flag
|
||||
run_test 3 "Add single URL with -u flag" test_single_url
|
||||
run_test 4 "Add multiple URLs with multiple -u flags" test_multiple_urls
|
||||
run_test 5 "Add URL with JSON options using -u0" test_url_with_options
|
||||
run_test 6 "Add multiple URLs with different options (-u0, -u1)" test_multiple_urls_with_options
|
||||
run_test 7 "Batch mode (-b) exits automatically after processing" test_batch_mode_exit
|
||||
run_test 8 "Recheck all watches with -r all" test_recheck_all
|
||||
run_test 9 "Recheck specific watches with -r UUID" test_recheck_specific
|
||||
run_test 10 "Combined: Add URLs and recheck all with -u and -r all" test_combined_operations
|
||||
run_test 11 "Invalid JSON in -u0 option should show error" test_invalid_json
|
||||
run_test 12 "Create datastore directory with -C flag" test_create_directory
|
||||
|
||||
# Wait for all tests to complete
|
||||
echo ""
|
||||
echo "Waiting for all tests to complete..."
|
||||
wait
|
||||
|
||||
# Collect results
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " Test Summary"
|
||||
echo "=========================================="
|
||||
|
||||
TESTS_RUN=0
|
||||
TESTS_PASSED=0
|
||||
TESTS_FAILED=0
|
||||
|
||||
for result_file in "$TEST_RESULTS_DIR"/test_*.result; do
|
||||
if [ -f "$result_file" ]; then
|
||||
TESTS_RUN=$((TESTS_RUN + 1))
|
||||
status=$(cut -d'|' -f1 < "$result_file")
|
||||
if [ "$status" = "pass" ]; then
|
||||
TESTS_PASSED=$((TESTS_PASSED + 1))
|
||||
else
|
||||
TESTS_FAILED=$((TESTS_FAILED + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Tests run: $TESTS_RUN"
|
||||
echo -e "${GREEN}Tests passed: $TESTS_PASSED${NC}"
|
||||
if [ $TESTS_FAILED -gt 0 ]; then
|
||||
echo -e "${RED}Tests failed: $TESTS_FAILED${NC}"
|
||||
else
|
||||
echo -e "${GREEN}Tests failed: $TESTS_FAILED${NC}"
|
||||
fi
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Exit with appropriate code
|
||||
if [ $TESTS_FAILED -gt 0 ]; then
|
||||
echo -e "${RED}Some tests failed!${NC}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "${GREEN}All tests passed!${NC}"
|
||||
exit 0
|
||||
fi
|
||||
@@ -5,13 +5,11 @@ from threading import Thread
|
||||
|
||||
import pytest
|
||||
import arrow
|
||||
from changedetectionio import changedetection_app
|
||||
from changedetectionio import store
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import init_app_secret
|
||||
from changedetectionio.flask_app import init_app_secret, changedetection_app
|
||||
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
|
||||
|
||||
# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py
|
||||
@@ -31,6 +29,24 @@ def reportlog(pytestconfig):
|
||||
logger.remove(handler_id)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def live_server_options():
|
||||
"""Configure live_server to run in threaded mode for concurrent requests.
|
||||
|
||||
CRITICAL: Without threaded=True, the test server is single-threaded and will
|
||||
serialize all requests. This breaks tests that verify concurrent worker behavior.
|
||||
|
||||
With threaded=True:
|
||||
- Multiple workers can fetch from test server concurrently
|
||||
- Test endpoints with time.sleep(delay) don't block other requests
|
||||
- Real concurrency testing is possible
|
||||
"""
|
||||
return {
|
||||
'threaded': True, # Enable multi-threading for concurrent requests
|
||||
'port': 0, # Use random available port
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def environment(mocker):
|
||||
"""Mock arrow.now() to return a fixed datetime for testing jinja2 time extension."""
|
||||
@@ -189,6 +205,54 @@ def prepare_test_function(live_server, datastore_path):
|
||||
logger.debug(f"prepare_test_function: Reloaded datastore at {hex(id(datastore))}")
|
||||
logger.debug(f"prepare_test_function: Path {datastore.datastore_path}")
|
||||
|
||||
# Add test helper methods to the app for worker management
|
||||
def set_workers(count):
|
||||
"""Set the number of workers for testing - brutal shutdown, no delays"""
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
|
||||
current_count = worker_pool.get_worker_count()
|
||||
|
||||
# Special case: Setting to 0 means shutdown all workers brutally
|
||||
if count == 0:
|
||||
logger.debug(f"Brutally shutting down all {current_count} workers")
|
||||
worker_pool.shutdown_workers()
|
||||
return {
|
||||
'status': 'success',
|
||||
'message': f'Shutdown all {current_count} workers',
|
||||
'previous_count': current_count,
|
||||
'current_count': 0
|
||||
}
|
||||
|
||||
# Adjust worker count (no delays, no verification)
|
||||
result = worker_pool.adjust_async_worker_count(
|
||||
count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=live_server.app,
|
||||
datastore=datastore
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def check_all_workers_alive(expected_count):
|
||||
"""Check that all expected workers are alive"""
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
result = worker_pool.check_worker_health(
|
||||
expected_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=live_server.app,
|
||||
datastore=datastore
|
||||
)
|
||||
assert result['status'] == 'healthy', f"Workers not healthy: {result['message']}"
|
||||
return result
|
||||
|
||||
# Attach helper methods to app for easy test access
|
||||
live_server.app.set_workers = set_workers
|
||||
live_server.app.check_all_workers_alive = check_all_workers_alive
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup: Clear watches and queue after test
|
||||
@@ -264,8 +328,8 @@ def app(request, datastore_path):
|
||||
|
||||
# Shutdown workers gracefully before loguru cleanup
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
from changedetectionio import worker_pool
|
||||
worker_pool.shutdown_workers()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
166
changedetectionio/tests/test_api_notification_urls_validation.py
Normal file
166
changedetectionio/tests/test_api_notification_urls_validation.py
Normal file
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Test notification_urls validation in Watch and Tag API endpoints.
|
||||
Ensures that invalid AppRise URLs are rejected when setting notification_urls.
|
||||
|
||||
Valid AppRise notification URLs use specific protocols like:
|
||||
- posts://example.com - POST to HTTP endpoint
|
||||
- gets://example.com - GET to HTTP endpoint
|
||||
- mailto://user@example.com - Email
|
||||
- slack://token/channel - Slack
|
||||
- discord://webhook_id/webhook_token - Discord
|
||||
- etc.
|
||||
|
||||
Invalid notification URLs:
|
||||
- https://example.com - Plain HTTPS is NOT a valid AppRise notification protocol
|
||||
- ftp://example.com - FTP is NOT a valid AppRise notification protocol
|
||||
- Plain URLs without proper AppRise protocol prefix
|
||||
"""
|
||||
|
||||
from flask import url_for
|
||||
import json
|
||||
|
||||
|
||||
def test_watch_notification_urls_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that Watch PUT/POST endpoints validate notification_urls."""
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Test 1: Create a watch with valid notification URLs
|
||||
valid_urls = ["posts://example.com/notify1", "posts://example.com/notify2"]
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"notification_urls": valid_urls
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 201, "Should accept valid notification URLs on watch creation"
|
||||
watch_uuid = res.json['uuid']
|
||||
|
||||
# Verify the notification URLs were saved
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert set(res.json['notification_urls']) == set(valid_urls), "Valid notification URLs should be saved"
|
||||
|
||||
# Test 2: Try to create a watch with invalid notification URLs (https:// is not valid)
|
||||
invalid_urls = ["https://example.com/webhook"]
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"notification_urls": invalid_urls
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject https:// notification URLs (not a valid AppRise protocol)"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 2b: Also test other invalid protocols
|
||||
invalid_urls_ftp = ["ftp://not-apprise-url"]
|
||||
res = client.post(
|
||||
url_for("createwatch"),
|
||||
data=json.dumps({
|
||||
"url": "https://example.com",
|
||||
"notification_urls": invalid_urls_ftp
|
||||
}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject ftp:// notification URLs"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 3: Update watch with valid notification URLs
|
||||
new_valid_urls = ["posts://newserver.com"]
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"notification_urls": new_valid_urls}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200, "Should accept valid notification URLs on watch update"
|
||||
|
||||
# Verify the notification URLs were updated
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert res.json['notification_urls'] == new_valid_urls, "Valid notification URLs should be updated"
|
||||
|
||||
# Test 4: Try to update watch with invalid notification URLs (plain https:// not valid)
|
||||
invalid_https_url = ["https://example.com/webhook"]
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"notification_urls": invalid_https_url}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject https:// notification URLs on watch update"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 5: Update watch with non-list notification_urls (caught by OpenAPI schema validation)
|
||||
res = client.put(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
data=json.dumps({"notification_urls": "not-a-list"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject non-list notification_urls"
|
||||
assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
|
||||
|
||||
# Test 6: Verify original URLs are preserved after failed update
|
||||
res = client.get(
|
||||
url_for("watch", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert res.json['notification_urls'] == new_valid_urls, "URLs should remain unchanged after validation failure"
|
||||
|
||||
|
||||
def test_tag_notification_urls_validation(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that Tag PUT endpoint validates notification_urls."""
|
||||
from changedetectionio.model import Tag
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
datastore = live_server.app.config['DATASTORE']
|
||||
|
||||
# Create a tag
|
||||
tag_uuid = datastore.add_tag(title="Test Tag")
|
||||
assert tag_uuid is not None
|
||||
|
||||
# Test 1: Update tag with valid notification URLs
|
||||
valid_urls = ["posts://example.com/tag-notify"]
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
data=json.dumps({"notification_urls": valid_urls}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 200, "Should accept valid notification URLs on tag update"
|
||||
|
||||
# Verify the notification URLs were saved
|
||||
tag = datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
assert tag['notification_urls'] == valid_urls, "Valid notification URLs should be saved to tag"
|
||||
|
||||
# Test 2: Try to update tag with invalid notification URLs (https:// not valid)
|
||||
invalid_urls = ["https://example.com/webhook"]
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
data=json.dumps({"notification_urls": invalid_urls}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject https:// notification URLs on tag update"
|
||||
assert b"is not a valid AppRise URL" in res.data, "Should provide AppRise validation error message"
|
||||
|
||||
# Test 3: Update tag with non-list notification_urls (caught by OpenAPI schema validation)
|
||||
res = client.put(
|
||||
url_for("tag", uuid=tag_uuid),
|
||||
data=json.dumps({"notification_urls": "not-a-list"}),
|
||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||
)
|
||||
assert res.status_code == 400, "Should reject non-list notification_urls"
|
||||
assert b"OpenAPI validation failed" in res.data or b"Request body validation error" in res.data
|
||||
|
||||
# Test 4: Verify original URLs are preserved after failed update
|
||||
tag = datastore.data['settings']['application']['tags'][tag_uuid]
|
||||
assert tag['notification_urls'] == valid_urls, "URLs should remain unchanged after validation failure"
|
||||
@@ -53,11 +53,21 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
backup = ZipFile(io.BytesIO(res.data))
|
||||
l = backup.namelist()
|
||||
uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||
newlist = list(filter(uuid4hex.match, l)) # Read Note below
|
||||
|
||||
# Check for UUID-based txt files (history and snapshot)
|
||||
uuid4hex_txt = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||
txt_files = list(filter(uuid4hex_txt.match, l))
|
||||
# Should be two txt files in the archive (history and the snapshot)
|
||||
assert len(newlist) == 2
|
||||
assert len(txt_files) == 2
|
||||
|
||||
# Check for watch.json files (new format)
|
||||
uuid4hex_json = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/watch\.json$', re.I)
|
||||
json_files = list(filter(uuid4hex_json.match, l))
|
||||
# Should be one watch.json file in the archive (the imported watch)
|
||||
assert len(json_files) == 1, f"Expected 1 watch.json file, found {len(json_files)}: {json_files}"
|
||||
|
||||
# Check for changedetection.json (settings file)
|
||||
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
||||
|
||||
# Get the latest one
|
||||
res = client.get(
|
||||
|
||||
@@ -59,11 +59,29 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
||||
# Wait for the sync DB save to happen
|
||||
time.sleep(2)
|
||||
|
||||
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
|
||||
# Check which format is being used
|
||||
datastore_path = live_server.app.config['DATASTORE'].datastore_path
|
||||
changedetection_json = os.path.join(datastore_path, 'changedetection.json')
|
||||
url_watches_json = os.path.join(datastore_path, 'url-watches.json')
|
||||
|
||||
json_obj = None
|
||||
with open(json_db_file, 'r', encoding='utf-8') as f:
|
||||
json_obj = json.load(f)
|
||||
json_obj = {'watching': {}}
|
||||
|
||||
if os.path.exists(changedetection_json):
|
||||
# New format: individual watch.json files
|
||||
logger.info("Testing with new format (changedetection.json + individual watch.json)")
|
||||
|
||||
# Load each watch.json file
|
||||
for uuid in live_server.app.config['DATASTORE'].data['watching'].keys():
|
||||
watch_json_file = os.path.join(datastore_path, uuid, 'watch.json')
|
||||
assert os.path.isfile(watch_json_file), f"watch.json should exist at {watch_json_file}"
|
||||
|
||||
with open(watch_json_file, 'r', encoding='utf-8') as f:
|
||||
json_obj['watching'][uuid] = json.load(f)
|
||||
else:
|
||||
# Legacy format: url-watches.json
|
||||
logger.info("Testing with legacy format (url-watches.json)")
|
||||
with open(url_watches_json, 'r', encoding='utf-8') as f:
|
||||
json_obj = json.load(f)
|
||||
|
||||
# assert the right amount of watches was found in the JSON
|
||||
assert len(json_obj['watching']) == len(workers), "Correct number of watches was found in the JSON"
|
||||
@@ -88,7 +106,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
||||
|
||||
# Find the snapshot one
|
||||
for fname in files_in_watch_dir:
|
||||
if fname != 'history.txt' and 'html' not in fname:
|
||||
if fname != 'history.txt' and fname != 'watch.json' and 'html' not in fname:
|
||||
if strtobool(os.getenv("TEST_WITH_BROTLI")):
|
||||
assert fname.endswith('.br'), "Forced TEST_WITH_BROTLI then it should be a .br filename"
|
||||
|
||||
@@ -105,11 +123,23 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
||||
assert json_obj['watching'][w]['title'], "Watch should have a title set"
|
||||
assert contents.startswith(watch_title + "x"), f"Snapshot contents in file {fname} should start with '{watch_title}x', got '{contents}'"
|
||||
|
||||
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
|
||||
# With new format, we also have watch.json, so 4 files total
|
||||
if os.path.exists(changedetection_json):
|
||||
assert len(files_in_watch_dir) == 4, "Should be four files in the dir with new format: watch.json, html.br snapshot, history.txt and the extracted text snapshot"
|
||||
else:
|
||||
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir with legacy format: html.br snapshot, history.txt and the extracted text snapshot"
|
||||
|
||||
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
|
||||
with open(json_db_file, 'r', encoding='utf-8') as f:
|
||||
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
|
||||
# Check that 'default' Watch vars aren't accidentally being saved
|
||||
if os.path.exists(changedetection_json):
|
||||
# New format: check all individual watch.json files
|
||||
for uuid in json_obj['watching'].keys():
|
||||
watch_json_file = os.path.join(datastore_path, uuid, 'watch.json')
|
||||
with open(watch_json_file, 'r', encoding='utf-8') as f:
|
||||
assert '"default"' not in f.read(), f"'default' probably shouldnt be here in {watch_json_file}, it came from when the 'default' Watch vars were accidently being saved"
|
||||
else:
|
||||
# Legacy format: check url-watches.json
|
||||
with open(url_watches_json, 'r', encoding='utf-8') as f:
|
||||
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
|
||||
|
||||
|
||||
def test_check_text_history_view(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -160,7 +160,7 @@ def test_invalid_locale(client, live_server, measure_memory_usage, datastore_pat
|
||||
def test_language_persistence_in_session(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that the language preference persists across multiple requests
|
||||
within the same session.
|
||||
within the same session, and that auto-detect properly clears the preference.
|
||||
"""
|
||||
|
||||
# Establish session cookie
|
||||
@@ -184,6 +184,34 @@ def test_language_persistence_in_session(client, live_server, measure_memory_usa
|
||||
assert res.status_code == 200
|
||||
assert b"Annulla" in res.data, "Italian text should persist across requests"
|
||||
|
||||
# Verify locale is in session
|
||||
with client.session_transaction() as sess:
|
||||
assert sess.get('locale') == 'it', "Locale should be set in session"
|
||||
|
||||
# Call auto-detect to clear the locale
|
||||
res = client.get(
|
||||
url_for("ui.delete_locale_language_session_var_if_it_exists"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
# Verify the flash message appears (in English since we cleared the locale)
|
||||
assert b"Language set to auto-detect from browser" in res.data, "Should show flash message"
|
||||
|
||||
# Verify locale was removed from session
|
||||
with client.session_transaction() as sess:
|
||||
assert 'locale' not in sess, "Locale should be removed from session after auto-detect"
|
||||
|
||||
# Now requests should use browser default (English in test environment)
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
assert b"Cancel" in res.data, "Should show English after auto-detect clears Italian"
|
||||
assert b"Annulla" not in res.data, "Should not show Italian after auto-detect"
|
||||
|
||||
|
||||
def test_set_language_with_redirect(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
|
||||
52
changedetectionio/tests/test_queue_handler.py
Normal file
52
changedetectionio/tests/test_queue_handler.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||
from ..notification import valid_notification_formats
|
||||
from loguru import logger
|
||||
|
||||
def test_queue_system(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that multiple workers can process queue concurrently without blocking each other"""
|
||||
# (pytest) Werkzeug's threaded server uses ThreadPoolExecutor with a default limit of around 40 threads (or min(32, os.cpu_count() + 4)).
|
||||
items = os.cpu_count() +3
|
||||
delay = 10
|
||||
# Auto-queue is off here.
|
||||
live_server.app.config['DATASTORE'].data['settings']['application']['all_paused'] = True
|
||||
|
||||
test_urls = [
|
||||
f"{url_for('test_endpoint', _external=True)}?delay={delay}&id={i}&content=hello+test+content+{i}"
|
||||
for i in range(0, items)
|
||||
]
|
||||
|
||||
# Import 30 URLs to queue
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": "\r\n".join(test_urls)},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert f"{items} Imported".encode('utf-8') in res.data
|
||||
|
||||
client.application.set_workers(items)
|
||||
|
||||
start = time.time()
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(delay/2)
|
||||
|
||||
# Verify all workers are idle (no UUIDs being processed)
|
||||
from changedetectionio import worker_pool
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
logger.debug( f"Should be atleast some workers running - {len(running_uuids)} UUIDs still being processed: {running_uuids}")
|
||||
assert len(running_uuids) != 0, f"Should be atleast some workers running - {len(running_uuids)} UUIDs still being processed: {running_uuids}"
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# all workers should be done in less than say 10 seconds (they take time to 'see' something is in the queue too)
|
||||
total_time = (time.time() - start)
|
||||
logger.debug(f"All workers finished {items} items in less than {delay} seconds per job. {total_time}s total")
|
||||
# if there was a bug in queue handler not running parallel, this would blow out to items*delay seconds
|
||||
assert total_time < delay + 10, f"All workers finished {items} items in less than {delay} seconds per job, total time {total_time}s"
|
||||
|
||||
# Verify all workers are idle (no UUIDs being processed)
|
||||
from changedetectionio import worker_pool
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
assert len(running_uuids) == 0, f"Expected all workers to be idle, but {len(running_uuids)} UUIDs still being processed: {running_uuids}"
|
||||
@@ -142,10 +142,14 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
watches_with_body = 0
|
||||
with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
|
||||
app_struct = json.load(f)
|
||||
for uuid in app_struct['watching']:
|
||||
if app_struct['watching'][uuid]['body']==body_value:
|
||||
|
||||
# Read individual watch.json files
|
||||
for uuid in client.application.config.get('DATASTORE').data['watching'].keys():
|
||||
watch_json_file = os.path.join(datastore_path, uuid, 'watch.json')
|
||||
assert os.path.exists(watch_json_file), f"watch.json should exist at {watch_json_file}"
|
||||
with open(watch_json_file, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
if watch_data.get('body') == body_value:
|
||||
watches_with_body += 1
|
||||
|
||||
# Should be only one with body set
|
||||
@@ -225,10 +229,14 @@ def test_method_in_request(client, live_server, measure_memory_usage, datastore_
|
||||
wait_for_all_checks(client)
|
||||
|
||||
watches_with_method = 0
|
||||
with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
|
||||
app_struct = json.load(f)
|
||||
for uuid in app_struct['watching']:
|
||||
if app_struct['watching'][uuid]['method'] == 'PATCH':
|
||||
|
||||
# Read individual watch.json files
|
||||
for uuid in client.application.config.get('DATASTORE').data['watching'].keys():
|
||||
watch_json_file = os.path.join(datastore_path, uuid, 'watch.json')
|
||||
assert os.path.exists(watch_json_file), f"watch.json should exist at {watch_json_file}"
|
||||
with open(watch_json_file, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
if watch_data.get('method') == 'PATCH':
|
||||
watches_with_method += 1
|
||||
|
||||
# Should be only one with method set to PATCH
|
||||
|
||||
@@ -140,38 +140,13 @@ def delete_all_watches(client=None):
|
||||
def wait_for_all_checks(client=None):
|
||||
"""
|
||||
Waits until the queue is empty and workers are idle.
|
||||
Much faster than the original with adaptive timing.
|
||||
Delegates to worker_pool.wait_for_all_checks for shared logic.
|
||||
"""
|
||||
from changedetectionio.flask_app import update_q as global_update_q
|
||||
from changedetectionio import worker_handler
|
||||
empty_since = None
|
||||
attempt = 0
|
||||
max_attempts = 150 # Still reasonable upper bound
|
||||
|
||||
while attempt < max_attempts:
|
||||
# Start with fast checks, slow down if needed
|
||||
if attempt < 10:
|
||||
time.sleep(0.2) # Very fast initial checks
|
||||
elif attempt < 30:
|
||||
time.sleep(0.4) # Medium speed
|
||||
else:
|
||||
time.sleep(0.8) # Slower for persistent issues
|
||||
|
||||
q_length = global_update_q.qsize()
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
any_workers_busy = len(running_uuids) > 0
|
||||
|
||||
if q_length == 0 and not any_workers_busy:
|
||||
if empty_since is None:
|
||||
empty_since = time.time()
|
||||
# Brief stabilization period for async workers
|
||||
elif time.time() - empty_since >= 0.3:
|
||||
break
|
||||
else:
|
||||
empty_since = None
|
||||
|
||||
attempt += 1
|
||||
time.sleep(0.3)
|
||||
from changedetectionio import worker_pool
|
||||
time.sleep(0.05)
|
||||
# Use the shared wait logic from worker_handler
|
||||
return worker_pool.wait_for_all_checks(global_update_q, timeout=150)
|
||||
|
||||
def wait_for_watch_history(client, min_history_count=2, timeout=10):
|
||||
"""
|
||||
|
||||
101
changedetectionio/translations/README.md
Normal file
101
changedetectionio/translations/README.md
Normal file
@@ -0,0 +1,101 @@
|
||||
# Translation Guide
|
||||
|
||||
## Updating Translations
|
||||
|
||||
To maintain consistency and minimize unnecessary changes in translation files, run these commands:
|
||||
|
||||
```bash
|
||||
python setup.py extract_messages # Extract translatable strings
|
||||
python setup.py update_catalog # Update all language files
|
||||
python setup.py compile_catalog # Compile to binary .mo files
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
All translation settings are configured in **`../../setup.cfg`** (single source of truth).
|
||||
|
||||
The configuration below is shown for reference - **edit `setup.cfg` to change settings**:
|
||||
|
||||
```ini
|
||||
[extract_messages]
|
||||
# Extract translatable strings from source code
|
||||
mapping_file = babel.cfg
|
||||
output_file = changedetectionio/translations/messages.pot
|
||||
input_paths = changedetectionio
|
||||
keywords = _ _l gettext
|
||||
# Options to reduce unnecessary changes in .pot files
|
||||
sort_by_file = true # Keeps entries ordered by file path
|
||||
width = 120 # Consistent line width (prevents rewrapping)
|
||||
add_location = file # Show file path only (not line numbers)
|
||||
|
||||
[update_catalog]
|
||||
# Update existing .po files with new strings from .pot
|
||||
# Note: 'locale' is omitted - Babel auto-discovers all catalogs in output_dir
|
||||
input_file = changedetectionio/translations/messages.pot
|
||||
output_dir = changedetectionio/translations
|
||||
domain = messages
|
||||
# Options for consistent formatting
|
||||
width = 120 # Consistent line width
|
||||
no_fuzzy_matching = true # Avoids incorrect automatic matches
|
||||
|
||||
[compile_catalog]
|
||||
# Compile .po files to .mo binary format
|
||||
directory = changedetectionio/translations
|
||||
domain = messages
|
||||
```
|
||||
|
||||
**Key formatting options:**
|
||||
- `sort_by_file = true` - Orders entries by file path (consistent ordering)
|
||||
- `width = 120` - Fixed line width prevents text rewrapping
|
||||
- `add_location = file` - Shows file path only, not line numbers (reduces git churn)
|
||||
- `no_fuzzy_matching = true` - Prevents incorrect automatic fuzzy matches
|
||||
|
||||
## Why Use These Commands?
|
||||
|
||||
Running pybabel commands directly without consistent options causes:
|
||||
- ❌ Entries get reordered differently each time
|
||||
- ❌ Text gets rewrapped at different widths
|
||||
- ❌ Line numbers change every edit (if not configured)
|
||||
- ❌ Large diffs that make code review difficult
|
||||
|
||||
Using `python setup.py` commands ensures:
|
||||
- ✅ Consistent ordering (by file path, not alphabetically)
|
||||
- ✅ Consistent line width (120 characters, no rewrapping)
|
||||
- ✅ File-only locations (no line number churn)
|
||||
- ✅ No fuzzy matching (prevents incorrect auto-translations)
|
||||
- ✅ Minimal diffs (only actual changes show up)
|
||||
- ✅ Easier code review and git history
|
||||
|
||||
These commands read settings from `../../setup.cfg` automatically.
|
||||
|
||||
## Supported Languages
|
||||
|
||||
- `cs` - Czech (Čeština)
|
||||
- `de` - German (Deutsch)
|
||||
- `en_GB` - English (UK)
|
||||
- `en_US` - English (US)
|
||||
- `fr` - French (Français)
|
||||
- `it` - Italian (Italiano)
|
||||
- `ko` - Korean (한국어)
|
||||
- `zh` - Chinese Simplified (中文简体)
|
||||
- `zh_Hant_TW` - Chinese Traditional (繁體中文)
|
||||
|
||||
## Adding a New Language
|
||||
|
||||
1. Initialize the new language catalog:
|
||||
```bash
|
||||
pybabel init -i changedetectionio/translations/messages.pot -d changedetectionio/translations -l NEW_LANG_CODE
|
||||
```
|
||||
2. Compile it:
|
||||
```bash
|
||||
python setup.py compile_catalog
|
||||
```
|
||||
|
||||
Babel will auto-discover the new language on subsequent translation updates.
|
||||
|
||||
## Translation Notes
|
||||
|
||||
From CLAUDE.md:
|
||||
- Always use "monitor" or "watcher" terminology (not "clock")
|
||||
- Use the most brief wording suitable
|
||||
- When finding issues in one language, check ALL languages for the same issue
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -53,11 +53,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
watch = None
|
||||
|
||||
try:
|
||||
# Use sync interface via run_in_executor since each worker has its own event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
# Use async interface with custom executor to avoid thread pool exhaustion
|
||||
# With 30+ workers, we need executor sized to match (see worker_pool.py)
|
||||
queued_item_data = await asyncio.wait_for(
|
||||
loop.run_in_executor(executor, q.get, True, 1.0), # block=True, timeout=1.0
|
||||
timeout=1.5
|
||||
q.async_get(executor=executor),
|
||||
timeout=1.0
|
||||
)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
@@ -89,25 +89,27 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
continue
|
||||
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
# RACE CONDITION FIX: Check if this UUID is already being processed by another worker
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
# RACE CONDITION FIX: Atomically claim this UUID for processing
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.queuedWatchMetaData import PrioritizedItem
|
||||
if worker_handler.is_watch_running_by_another_worker(uuid, worker_id):
|
||||
logger.trace(f"Worker {worker_id} detected UUID {uuid} already being processed by another worker - deferring")
|
||||
|
||||
# Try to claim the UUID atomically - prevents duplicate processing
|
||||
if not worker_pool.claim_uuid_for_processing(uuid, worker_id):
|
||||
# Already being processed by another worker
|
||||
logger.trace(f"Worker {worker_id} detected UUID {uuid} already being processed - deferring")
|
||||
|
||||
# Sleep to avoid tight loop and give the other worker time to finish
|
||||
await asyncio.sleep(DEFER_SLEEP_TIME_ALREADY_QUEUED)
|
||||
|
||||
# Re-queue with lower priority so it gets checked again after current processing finishes
|
||||
deferred_priority = max(1000, queued_item_data.priority * 10)
|
||||
deferred_item = PrioritizedItem(priority=deferred_priority, item=queued_item_data.item)
|
||||
worker_handler.queue_item_async_safe(q, deferred_item, silent=True)
|
||||
worker_pool.queue_item_async_safe(q, deferred_item, silent=True)
|
||||
logger.debug(f"Worker {worker_id} re-queued UUID {uuid} for subsequent check")
|
||||
continue
|
||||
|
||||
fetch_start_time = round(time.time())
|
||||
|
||||
# Mark this UUID as being processed by this worker
|
||||
worker_handler.set_uuid_processing(uuid, worker_id=worker_id, processing=True)
|
||||
|
||||
try:
|
||||
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
|
||||
@@ -163,8 +165,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
except ProcessorException as e:
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
e.screenshot = None # Free memory immediately
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -184,9 +188,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -205,8 +211,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data, as_error=True)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
if e.page_text:
|
||||
watch.save_error_text(contents=e.page_text)
|
||||
|
||||
@@ -223,9 +231,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
@@ -317,6 +327,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
@@ -328,6 +339,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code,
|
||||
@@ -369,9 +381,17 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
if changed_detected or not watch.history_n:
|
||||
if update_handler.screenshot:
|
||||
watch.save_screenshot(screenshot=update_handler.screenshot)
|
||||
# Free screenshot memory immediately after saving
|
||||
update_handler.screenshot = None
|
||||
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'screenshot'):
|
||||
update_handler.fetcher.screenshot = None
|
||||
|
||||
if update_handler.xpath_data:
|
||||
watch.save_xpath_data(data=update_handler.xpath_data)
|
||||
# Free xpath data memory
|
||||
update_handler.xpath_data = None
|
||||
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'xpath_data'):
|
||||
update_handler.fetcher.xpath_data = None
|
||||
|
||||
# Ensure unique timestamp for history
|
||||
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
|
||||
@@ -438,6 +458,20 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
update_handler.fetcher.clear_content()
|
||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
||||
|
||||
# Explicitly delete update_handler to free all references
|
||||
if update_handler:
|
||||
del update_handler
|
||||
update_handler = None
|
||||
|
||||
# Force aggressive memory cleanup after clearing
|
||||
import gc
|
||||
gc.collect()
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
||||
@@ -455,8 +489,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
except Exception as e:
|
||||
logger.error(f"Exception while cleaning/quit after calling browser: {e}")
|
||||
try:
|
||||
# Mark UUID as no longer being processed by this worker
|
||||
worker_handler.set_uuid_processing(uuid, worker_id=worker_id, processing=False)
|
||||
# Release UUID from processing (thread-safe)
|
||||
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
|
||||
|
||||
# Send completion signal
|
||||
if watch:
|
||||
@@ -17,6 +17,7 @@ worker_threads = [] # List of WorkerThread objects
|
||||
|
||||
# Track currently processing UUIDs for async workers - maps {uuid: worker_id}
|
||||
currently_processing_uuids = {}
|
||||
_uuid_processing_lock = threading.Lock() # Protects currently_processing_uuids
|
||||
|
||||
# Configuration - async workers only
|
||||
USE_ASYNC_WORKERS = True
|
||||
@@ -90,7 +91,8 @@ class WorkerThread:
|
||||
self.thread.start()
|
||||
|
||||
def stop(self):
|
||||
"""Stop the worker thread"""
|
||||
"""Stop the worker thread brutally - no waiting"""
|
||||
# Try to stop the event loop if it exists
|
||||
if self.loop and self.running:
|
||||
try:
|
||||
# Signal the loop to stop
|
||||
@@ -98,8 +100,7 @@ class WorkerThread:
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
if self.thread and self.thread.is_alive():
|
||||
self.thread.join(timeout=2.0)
|
||||
# Don't wait - thread is daemon and will die when needed
|
||||
|
||||
|
||||
def start_async_workers(n_workers, update_q, notification_q, app, datastore):
|
||||
@@ -124,7 +125,7 @@ def start_async_workers(n_workers, update_q, notification_q, app, datastore):
|
||||
|
||||
async def start_single_async_worker(worker_id, update_q, notification_q, app, datastore, executor=None):
|
||||
"""Start a single async worker with auto-restart capability"""
|
||||
from changedetectionio.async_update_worker import async_update_worker
|
||||
from changedetectionio.worker import async_update_worker
|
||||
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
import os
|
||||
@@ -207,31 +208,80 @@ def get_worker_count():
|
||||
|
||||
def get_running_uuids():
|
||||
"""Get list of UUIDs currently being processed by async workers"""
|
||||
return list(currently_processing_uuids.keys())
|
||||
with _uuid_processing_lock:
|
||||
return list(currently_processing_uuids.keys())
|
||||
|
||||
|
||||
def claim_uuid_for_processing(uuid, worker_id):
|
||||
"""
|
||||
Atomically check if UUID is available and claim it for processing.
|
||||
|
||||
This is thread-safe and prevents race conditions where multiple workers
|
||||
try to process the same UUID simultaneously.
|
||||
|
||||
Args:
|
||||
uuid: The watch UUID to claim
|
||||
worker_id: The ID of the worker claiming this UUID
|
||||
|
||||
Returns:
|
||||
True if successfully claimed (UUID was not being processed)
|
||||
False if already being processed by another worker
|
||||
"""
|
||||
with _uuid_processing_lock:
|
||||
if uuid in currently_processing_uuids:
|
||||
# Already being processed by another worker
|
||||
return False
|
||||
# Claim it atomically
|
||||
currently_processing_uuids[uuid] = worker_id
|
||||
logger.debug(f"Worker {worker_id} claimed UUID: {uuid}")
|
||||
return True
|
||||
|
||||
|
||||
def release_uuid_from_processing(uuid, worker_id):
|
||||
"""
|
||||
Release a UUID from processing (thread-safe).
|
||||
|
||||
Args:
|
||||
uuid: The watch UUID to release
|
||||
worker_id: The ID of the worker releasing this UUID
|
||||
"""
|
||||
with _uuid_processing_lock:
|
||||
# Only remove if this worker owns it (defensive)
|
||||
if currently_processing_uuids.get(uuid) == worker_id:
|
||||
currently_processing_uuids.pop(uuid, None)
|
||||
logger.debug(f"Worker {worker_id} released UUID: {uuid}")
|
||||
else:
|
||||
logger.warning(f"Worker {worker_id} tried to release UUID {uuid} but doesn't own it (owned by {currently_processing_uuids.get(uuid, 'nobody')})")
|
||||
|
||||
|
||||
def set_uuid_processing(uuid, worker_id=None, processing=True):
|
||||
"""Mark a UUID as being processed or completed by a specific worker"""
|
||||
global currently_processing_uuids
|
||||
"""
|
||||
Mark a UUID as being processed or completed by a specific worker.
|
||||
|
||||
DEPRECATED: Use claim_uuid_for_processing() and release_uuid_from_processing() instead.
|
||||
This function is kept for backward compatibility but doesn't provide atomic check-and-set.
|
||||
"""
|
||||
if processing:
|
||||
currently_processing_uuids[uuid] = worker_id
|
||||
logger.debug(f"Worker {worker_id} started processing UUID: {uuid}")
|
||||
with _uuid_processing_lock:
|
||||
currently_processing_uuids[uuid] = worker_id
|
||||
logger.debug(f"Worker {worker_id} started processing UUID: {uuid}")
|
||||
else:
|
||||
currently_processing_uuids.pop(uuid, None)
|
||||
logger.debug(f"Worker {worker_id} finished processing UUID: {uuid}")
|
||||
release_uuid_from_processing(uuid, worker_id)
|
||||
|
||||
|
||||
def is_watch_running(watch_uuid):
|
||||
"""Check if a specific watch is currently being processed by any worker"""
|
||||
return watch_uuid in currently_processing_uuids
|
||||
with _uuid_processing_lock:
|
||||
return watch_uuid in currently_processing_uuids
|
||||
|
||||
|
||||
def is_watch_running_by_another_worker(watch_uuid, current_worker_id):
|
||||
"""Check if a specific watch is currently being processed by a different worker"""
|
||||
if watch_uuid not in currently_processing_uuids:
|
||||
return False
|
||||
processing_worker_id = currently_processing_uuids[watch_uuid]
|
||||
return processing_worker_id != current_worker_id
|
||||
with _uuid_processing_lock:
|
||||
if watch_uuid not in currently_processing_uuids:
|
||||
return False
|
||||
processing_worker_id = currently_processing_uuids[watch_uuid]
|
||||
return processing_worker_id != current_worker_id
|
||||
|
||||
|
||||
def queue_item_async_safe(update_q, item, silent=False):
|
||||
@@ -287,7 +337,7 @@ def queue_item_async_safe(update_q, item, silent=False):
|
||||
|
||||
|
||||
def shutdown_workers():
|
||||
"""Shutdown all async workers fast and aggressively"""
|
||||
"""Shutdown all async workers brutally - no delays, no waiting"""
|
||||
global worker_threads
|
||||
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
@@ -295,16 +345,17 @@ def shutdown_workers():
|
||||
in_pytest = "pytest" in os.sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
if not in_pytest:
|
||||
logger.info("Fast shutdown of async workers initiated...")
|
||||
logger.info("Brutal shutdown of async workers initiated...")
|
||||
|
||||
# Stop all worker threads
|
||||
# Stop all worker event loops
|
||||
for worker in worker_threads:
|
||||
worker.stop()
|
||||
|
||||
# Clear immediately - threads are daemon and will die
|
||||
worker_threads.clear()
|
||||
|
||||
if not in_pytest:
|
||||
logger.info("Async workers fast shutdown complete")
|
||||
logger.info("Async workers brutal shutdown complete")
|
||||
|
||||
|
||||
|
||||
@@ -381,6 +432,53 @@ def get_worker_status():
|
||||
}
|
||||
|
||||
|
||||
def wait_for_all_checks(update_q, timeout=150):
|
||||
"""
|
||||
Wait for queue to be empty and all workers to be idle.
|
||||
|
||||
Args:
|
||||
update_q: The update queue to monitor
|
||||
timeout: Maximum wait time in seconds (default 150 = 150 iterations * 0.2-0.8s)
|
||||
|
||||
Returns:
|
||||
bool: True if all checks completed, False if timeout
|
||||
"""
|
||||
import time
|
||||
empty_since = None
|
||||
attempt = 0
|
||||
max_attempts = timeout
|
||||
|
||||
while attempt < max_attempts:
|
||||
# Adaptive sleep - start fast, slow down if needed
|
||||
if attempt < 10:
|
||||
sleep_time = 0.2 # Very fast initial checks
|
||||
elif attempt < 30:
|
||||
sleep_time = 0.4 # Medium speed
|
||||
else:
|
||||
sleep_time = 0.8 # Slower for persistent issues
|
||||
|
||||
time.sleep(sleep_time)
|
||||
|
||||
q_length = update_q.qsize()
|
||||
running_uuids = get_running_uuids()
|
||||
any_workers_busy = len(running_uuids) > 0
|
||||
|
||||
if q_length == 0 and not any_workers_busy:
|
||||
if empty_since is None:
|
||||
empty_since = time.time()
|
||||
# Brief stabilization period for async workers
|
||||
elif time.time() - empty_since >= 0.3:
|
||||
# Add small buffer for filesystem operations to complete
|
||||
time.sleep(0.2)
|
||||
return True
|
||||
else:
|
||||
empty_since = None
|
||||
|
||||
attempt += 1
|
||||
|
||||
return False # Timeout
|
||||
|
||||
|
||||
def check_worker_health(expected_count, update_q=None, notification_q=None, app=None, datastore=None):
|
||||
"""
|
||||
Check if the expected number of async workers are running and restart any missing ones.
|
||||
@@ -8,7 +8,7 @@ flask-paginate
|
||||
flask_expects_json~=1.7
|
||||
flask_restful
|
||||
flask_cors # For the Chrome extension to operate
|
||||
janus # Thread-safe async/sync queue bridge
|
||||
# janus # No longer needed - using pure threading.Queue for multi-loop support
|
||||
flask_wtf~=1.2
|
||||
flask~=3.1
|
||||
flask-socketio~=5.6.0
|
||||
@@ -42,7 +42,7 @@ orjson~=3.11
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise==1.9.6
|
||||
apprise==1.9.7
|
||||
|
||||
diff_match_patch
|
||||
|
||||
@@ -91,7 +91,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
|
||||
|
||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
|
||||
|
||||
pyppeteer-ng==2.0.0rc11
|
||||
pyppeteer-ng==2.0.0rc12
|
||||
pyppeteerstealth>=0.0.4
|
||||
|
||||
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup
|
||||
|
||||
28
setup.cfg
Normal file
28
setup.cfg
Normal file
@@ -0,0 +1,28 @@
|
||||
# Translation configuration for changedetection.io
|
||||
# See changedetectionio/translations/README.md for full documentation on updating translations
|
||||
|
||||
[extract_messages]
|
||||
# Extract translatable strings from source code
|
||||
mapping_file = babel.cfg
|
||||
output_file = changedetectionio/translations/messages.pot
|
||||
input_paths = changedetectionio
|
||||
keywords = _ _l gettext
|
||||
# Options to reduce unnecessary changes in .pot files
|
||||
sort_by_file = true
|
||||
width = 120
|
||||
add_location = file
|
||||
|
||||
[update_catalog]
|
||||
# Update existing .po files with new strings from .pot
|
||||
# Note: Omitting 'locale' makes Babel auto-discover all catalogs in output_dir
|
||||
input_file = changedetectionio/translations/messages.pot
|
||||
output_dir = changedetectionio/translations
|
||||
domain = messages
|
||||
# Options for consistent formatting
|
||||
width = 120
|
||||
no_fuzzy_matching = true
|
||||
|
||||
[compile_catalog]
|
||||
# Compile .po files to .mo binary format
|
||||
directory = changedetectionio/translations
|
||||
domain = messages
|
||||
Reference in New Issue
Block a user