mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-01-27 01:16:22 +00:00
Compare commits
16 Commits
3792-langu
...
CLI-option
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
04e22f1b0a | ||
|
|
2a0e8cbf6c | ||
|
|
901d69af42 | ||
|
|
528c305928 | ||
|
|
a4357c7bb7 | ||
|
|
75db43fc09 | ||
|
|
f8c6c62107 | ||
|
|
4523918752 | ||
|
|
9d1743adbe | ||
|
|
f34d806b09 | ||
|
|
c22335ed01 | ||
|
|
0042f0c36a | ||
|
|
55e14cf394 | ||
|
|
308ccb5841 | ||
|
|
978e17acf6 | ||
|
|
73c29d1fa0 |
@@ -111,6 +111,32 @@ jobs:
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
|
||||
- name: Test CLI options
|
||||
run: |
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-cli-opts --network changedet-network test-changedetectionio bash -c 'changedetectionio/test_cli_opts.sh' &> cli-opts-output.txt
|
||||
echo "=== CLI Options Test Output ==="
|
||||
cat cli-opts-output.txt
|
||||
|
||||
- name: CLI Memory Test
|
||||
run: |
|
||||
echo "=== Checking CLI batch mode memory usage ==="
|
||||
# Extract RSS memory value from output
|
||||
RSS_MB=$(grep -oP "Memory consumption before worker shutdown: RSS=\K[\d.]+" cli-opts-output.txt | head -1 || echo "0")
|
||||
echo "RSS Memory: ${RSS_MB} MB"
|
||||
|
||||
# Check if RSS is less than 100MB
|
||||
if [ -n "$RSS_MB" ]; then
|
||||
if (( $(echo "$RSS_MB < 100" | bc -l) )); then
|
||||
echo "✓ Memory usage is acceptable: ${RSS_MB} MB < 100 MB"
|
||||
else
|
||||
echo "✗ Memory usage too high: ${RSS_MB} MB >= 100 MB"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "⚠ Could not extract memory usage, skipping check"
|
||||
fi
|
||||
|
||||
- name: Extract memory report and logs
|
||||
if: always()
|
||||
uses: ./.github/actions/extract-memory-report
|
||||
@@ -125,6 +151,13 @@ jobs:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
|
||||
- name: Store CLI test output
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
|
||||
path: cli-opts-output.txt
|
||||
|
||||
# Playwright tests
|
||||
playwright-tests:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -2,23 +2,24 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.52.8'
|
||||
__version__ = '0.52.9'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
from loguru import logger
|
||||
import getopt
|
||||
import logging
|
||||
import os
|
||||
import getopt
|
||||
import platform
|
||||
import signal
|
||||
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
# Eventlet completely removed - using threading mode for SocketIO
|
||||
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
from loguru import logger
|
||||
# Note: store and changedetection_app are imported inside main() to avoid
|
||||
# initialization before argument parsing (allows --help to work without loading everything)
|
||||
|
||||
# ==============================================================================
|
||||
# Multiprocessing Configuration - CRITICAL for Thread Safety
|
||||
@@ -83,11 +84,22 @@ def get_version():
|
||||
def sigshutdown_handler(_signo, _stack_frame):
|
||||
name = signal.Signals(_signo).name
|
||||
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
|
||||
|
||||
|
||||
# Set exit flag immediately to stop all loops
|
||||
app.config.exit.set()
|
||||
datastore.stop_thread = True
|
||||
|
||||
|
||||
# Log memory consumption before shutting down workers (cross-platform)
|
||||
try:
|
||||
import psutil
|
||||
process = psutil.Process()
|
||||
mem_info = process.memory_info()
|
||||
rss_mb = mem_info.rss / 1024 / 1024
|
||||
vms_mb = mem_info.vms / 1024 / 1024
|
||||
logger.info(f"Memory consumption before worker shutdown: RSS={rss_mb:,.2f} MB, VMS={vms_mb:,.2f} MB")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not retrieve memory stats: {str(e)}")
|
||||
|
||||
# Shutdown workers and queues immediately
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
@@ -125,10 +137,51 @@ def main():
|
||||
global datastore
|
||||
global app
|
||||
|
||||
# Early help/version check before any initialization
|
||||
if '--help' in sys.argv or '-help' in sys.argv:
|
||||
print('Usage: changedetection.py [options]')
|
||||
print('')
|
||||
print('Standard options:')
|
||||
print(' -s SSL enable')
|
||||
print(' -h HOST Listen host (default: 0.0.0.0)')
|
||||
print(' -p PORT Listen port (default: 5000)')
|
||||
print(' -d PATH Datastore path')
|
||||
print(' -l LEVEL Log level (TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL)')
|
||||
print(' -c Cleanup unused snapshots')
|
||||
print(' -C Create datastore directory if it doesn\'t exist')
|
||||
print('')
|
||||
print('Add URLs on startup:')
|
||||
print(' -u URL Add URL to watch (can be used multiple times)')
|
||||
print(' -u0 \'JSON\' Set options for first -u URL (e.g. \'{"processor":"text_json_diff"}\')')
|
||||
print(' -u1 \'JSON\' Set options for second -u URL (0-indexed)')
|
||||
print(' -u2 \'JSON\' Set options for third -u URL, etc.')
|
||||
print(' Available options: processor, fetch_backend, headers, method, etc.')
|
||||
print(' See model/Watch.py for all available options')
|
||||
print('')
|
||||
print('Recheck on startup:')
|
||||
print(' -r all Queue all watches for recheck on startup')
|
||||
print(' -r UUID,... Queue specific watches (comma-separated UUIDs)')
|
||||
print(' -r all N Queue all watches, wait for completion, repeat N times')
|
||||
print(' -r UUID,... N Queue specific watches, wait for completion, repeat N times')
|
||||
print('')
|
||||
print('Batch mode:')
|
||||
print(' -b Run in batch mode (process queue then exit)')
|
||||
print(' Useful for CI/CD, cron jobs, or one-time checks')
|
||||
print(' NOTE: Batch mode checks if Flask is running and aborts if port is in use')
|
||||
print(' Use -p PORT to specify a different port if needed')
|
||||
print('')
|
||||
sys.exit(0)
|
||||
|
||||
if '--version' in sys.argv or '-v' in sys.argv:
|
||||
print(f'changedetection.io {__version__}')
|
||||
sys.exit(0)
|
||||
|
||||
# Import heavy modules after help/version checks to keep startup fast for those flags
|
||||
from changedetectionio import store
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
# Optional URL to watch since start
|
||||
default_url = None
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
include_default_watches = True
|
||||
@@ -137,6 +190,13 @@ def main():
|
||||
port = int(os.environ.get('PORT', 5000))
|
||||
ssl_mode = False
|
||||
|
||||
# Lists for multiple URLs and their options
|
||||
urls_to_add = []
|
||||
url_options = {} # Key: index (0-based), Value: dict of options
|
||||
recheck_watches = None # None, 'all', or list of UUIDs
|
||||
recheck_repeat_count = 1 # Number of times to repeat recheck cycle
|
||||
batch_mode = False # Run once then exit when queue is empty
|
||||
|
||||
# On Windows, create and use a default path.
|
||||
if os.name == 'nt':
|
||||
datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
|
||||
@@ -145,10 +205,98 @@ def main():
|
||||
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
|
||||
datastore_path = os.path.join(os.getcwd(), "../datastore")
|
||||
|
||||
# Pre-process arguments to extract -u, -u<N>, and -r options before getopt
|
||||
# This allows unlimited -u0, -u1, -u2, ... options without predefining them
|
||||
cleaned_argv = ['changedetection.py'] # Start with program name
|
||||
i = 1
|
||||
while i < len(sys.argv):
|
||||
arg = sys.argv[i]
|
||||
|
||||
# Handle -u (add URL)
|
||||
if arg == '-u' and i + 1 < len(sys.argv):
|
||||
urls_to_add.append(sys.argv[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -u<N> (set options for URL at index N)
|
||||
if arg.startswith('-u') and len(arg) > 2 and arg[2:].isdigit():
|
||||
idx = int(arg[2:])
|
||||
if i + 1 < len(sys.argv):
|
||||
try:
|
||||
import json
|
||||
url_options[idx] = json.loads(sys.argv[i + 1])
|
||||
except json.JSONDecodeError as e:
|
||||
print(f'Error: Invalid JSON for {arg}: {sys.argv[i + 1]}')
|
||||
print(f'JSON decode error: {e}')
|
||||
sys.exit(2)
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -r (recheck watches)
|
||||
if arg == '-r' and i + 1 < len(sys.argv):
|
||||
recheck_arg = sys.argv[i + 1]
|
||||
if recheck_arg.lower() == 'all':
|
||||
recheck_watches = 'all'
|
||||
else:
|
||||
# Parse comma-separated list of UUIDs
|
||||
recheck_watches = [uuid.strip() for uuid in recheck_arg.split(',') if uuid.strip()]
|
||||
|
||||
# Check for optional repeat count as third argument
|
||||
if i + 2 < len(sys.argv) and sys.argv[i + 2].isdigit():
|
||||
recheck_repeat_count = int(sys.argv[i + 2])
|
||||
if recheck_repeat_count < 1:
|
||||
print(f'Error: Repeat count must be at least 1, got {recheck_repeat_count}')
|
||||
sys.exit(2)
|
||||
i += 3
|
||||
else:
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Handle -b (batch mode - run once and exit)
|
||||
if arg == '-b':
|
||||
batch_mode = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Keep other arguments for getopt
|
||||
cleaned_argv.append(arg)
|
||||
i += 1
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
|
||||
except getopt.GetoptError:
|
||||
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
|
||||
opts, args = getopt.getopt(cleaned_argv[1:], "6Ccsd:h:p:l:", "port")
|
||||
except getopt.GetoptError as e:
|
||||
print('Usage: changedetection.py [options]')
|
||||
print('')
|
||||
print('Standard options:')
|
||||
print(' -s SSL enable')
|
||||
print(' -h HOST Listen host (default: 0.0.0.0)')
|
||||
print(' -p PORT Listen port (default: 5000)')
|
||||
print(' -d PATH Datastore path')
|
||||
print(' -l LEVEL Log level (TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL)')
|
||||
print(' -c Cleanup unused snapshots')
|
||||
print(' -C Create datastore directory if it doesn\'t exist')
|
||||
print('')
|
||||
print('Add URLs on startup:')
|
||||
print(' -u URL Add URL to watch (can be used multiple times)')
|
||||
print(' -u0 \'JSON\' Set options for first -u URL (e.g. \'{"processor":"text_json_diff"}\')')
|
||||
print(' -u1 \'JSON\' Set options for second -u URL (0-indexed)')
|
||||
print(' -u2 \'JSON\' Set options for third -u URL, etc.')
|
||||
print(' Available options: processor, fetch_backend, headers, method, etc.')
|
||||
print(' See model/Watch.py for all available options')
|
||||
print('')
|
||||
print('Recheck on startup:')
|
||||
print(' -r all Queue all watches for recheck on startup')
|
||||
print(' -r UUID,... Queue specific watches (comma-separated UUIDs)')
|
||||
print(' -r all N Queue all watches, wait for completion, repeat N times')
|
||||
print(' -r UUID,... N Queue specific watches, wait for completion, repeat N times')
|
||||
print('')
|
||||
print('Batch mode:')
|
||||
print(' -b Run in batch mode (process queue then exit)')
|
||||
print(' Useful for CI/CD, cron jobs, or one-time checks')
|
||||
print(' NOTE: Batch mode checks if Flask is running and aborts if port is in use')
|
||||
print(' Use -p PORT to specify a different port if needed')
|
||||
print('')
|
||||
print(f'Error: {e}')
|
||||
sys.exit(2)
|
||||
|
||||
create_datastore_dir = False
|
||||
@@ -173,10 +321,6 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
if opt == '-u':
|
||||
default_url = arg
|
||||
include_default_watches = False
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
@@ -188,6 +332,10 @@ def main():
|
||||
if opt == '-l':
|
||||
logger_level = int(arg) if arg.isdigit() else arg.upper()
|
||||
|
||||
# If URLs are provided, don't include default watches
|
||||
if urls_to_add:
|
||||
include_default_watches = False
|
||||
|
||||
|
||||
logger.success(f"changedetection.io version {get_version()} starting.")
|
||||
# Launch using SocketIO run method for proper integration (if enabled)
|
||||
@@ -224,11 +372,16 @@ def main():
|
||||
logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING)
|
||||
|
||||
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
|
||||
app_config = {'datastore_path': datastore_path}
|
||||
app_config = {
|
||||
'datastore_path': datastore_path,
|
||||
'batch_mode': batch_mode,
|
||||
'recheck_watches': recheck_watches,
|
||||
'recheck_repeat_count': recheck_repeat_count
|
||||
}
|
||||
|
||||
if not os.path.isdir(app_config['datastore_path']):
|
||||
if create_datastore_dir:
|
||||
os.mkdir(app_config['datastore_path'])
|
||||
os.makedirs(app_config['datastore_path'], exist_ok=True)
|
||||
else:
|
||||
logger.critical(
|
||||
f"ERROR: Directory path for the datastore '{app_config['datastore_path']}'"
|
||||
@@ -249,11 +402,200 @@ def main():
|
||||
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
|
||||
inject_datastore_into_plugins(datastore)
|
||||
|
||||
if default_url:
|
||||
datastore.add_watch(url = default_url)
|
||||
# Step 1: Add URLs with their options (if provided via -u flags)
|
||||
added_watch_uuids = []
|
||||
if urls_to_add:
|
||||
logger.info(f"Adding {len(urls_to_add)} URL(s) from command line")
|
||||
for idx, url in enumerate(urls_to_add):
|
||||
extras = url_options.get(idx, {})
|
||||
if extras:
|
||||
logger.debug(f"Adding watch {idx}: {url} with options: {extras}")
|
||||
else:
|
||||
logger.debug(f"Adding watch {idx}: {url}")
|
||||
|
||||
new_uuid = datastore.add_watch(url=url, extras=extras)
|
||||
if new_uuid:
|
||||
added_watch_uuids.append(new_uuid)
|
||||
logger.success(f"Added watch: {url} (UUID: {new_uuid})")
|
||||
else:
|
||||
logger.error(f"Failed to add watch: {url}")
|
||||
|
||||
app = changedetection_app(app_config, datastore)
|
||||
|
||||
# Step 2: Queue newly added watches (if -u was provided in batch mode)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if batch_mode and added_watch_uuids:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_handler
|
||||
|
||||
logger.info(f"Batch mode: Queuing {len(added_watch_uuids)} newly added watches")
|
||||
for watch_uuid in added_watch_uuids:
|
||||
try:
|
||||
worker_handler.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
logger.debug(f"Queued newly added watch: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
|
||||
# Step 3: Queue watches for recheck (if -r was provided)
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if recheck_watches is not None:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_handler
|
||||
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
# Queue all watches, excluding those already queued in batch mode
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids:
|
||||
# Exclude newly added watches that were already queued in batch mode
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
logger.info(f"Queuing {len(watches_to_queue)} existing watches for recheck ({len(added_watch_uuids)} newly added watches already queued)")
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Queuing all {len(watches_to_queue)} watches for recheck")
|
||||
else:
|
||||
# Queue specific UUIDs
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Queuing {len(watches_to_queue)} specific watches for recheck")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_handler.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
logger.debug(f"Queued watch for recheck: {watch_uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
|
||||
logger.success(f"Successfully queued {queued_count} watches for recheck")
|
||||
|
||||
# Step 4: Setup batch mode monitor (if -b was provided)
|
||||
if batch_mode:
|
||||
from changedetectionio.flask_app import update_q
|
||||
|
||||
# Safety check: Ensure Flask app is not already running on this port
|
||||
# Batch mode should never run alongside the web server
|
||||
import socket
|
||||
test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
|
||||
try:
|
||||
# Try to bind to the configured host:port (no SO_REUSEADDR - strict check)
|
||||
test_socket.bind((host, port))
|
||||
test_socket.close()
|
||||
logger.debug(f"Batch mode: Port {port} is available (Flask app not running)")
|
||||
except OSError as e:
|
||||
test_socket.close()
|
||||
# errno 98 = EADDRINUSE (Linux)
|
||||
# errno 48 = EADDRINUSE (macOS)
|
||||
# errno 10048 = WSAEADDRINUSE (Windows)
|
||||
if e.errno in (48, 98, 10048) or "Address already in use" in str(e) or "already in use" in str(e).lower():
|
||||
logger.critical(f"ERROR: Batch mode cannot run - port {port} is already in use")
|
||||
logger.critical(f"The Flask web server appears to be running on {host}:{port}")
|
||||
logger.critical(f"Batch mode is designed for standalone operation (CI/CD, cron jobs, etc.)")
|
||||
logger.critical(f"Please either stop the Flask web server, or use a different port with -p PORT")
|
||||
sys.exit(1)
|
||||
else:
|
||||
# Some other socket error - log but continue (might be network configuration issue)
|
||||
logger.warning(f"Port availability check failed with unexpected error: {e}")
|
||||
logger.warning(f"Continuing with batch mode anyway - be aware of potential conflicts")
|
||||
|
||||
def queue_watches_for_recheck(datastore, iteration):
|
||||
"""Helper function to queue watches for recheck"""
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
all_watches = list(datastore.data['watching'].keys())
|
||||
if batch_mode and added_watch_uuids and iteration == 1:
|
||||
# Only exclude newly added watches on first iteration
|
||||
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
|
||||
else:
|
||||
watches_to_queue = all_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing all {len(watches_to_queue)} watches")
|
||||
elif recheck_watches:
|
||||
watches_to_queue = recheck_watches
|
||||
logger.info(f"Batch mode (iteration {iteration}): Queuing {len(watches_to_queue)} specific watches")
|
||||
|
||||
queued_count = 0
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_handler.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
queued_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
|
||||
else:
|
||||
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
|
||||
logger.success(f"Batch mode (iteration {iteration}): Successfully queued {queued_count} watches")
|
||||
return queued_count
|
||||
|
||||
def batch_mode_monitor():
|
||||
"""Monitor queue and workers, shutdown or repeat when work is complete"""
|
||||
import time
|
||||
|
||||
# Track iterations if repeat mode is enabled
|
||||
current_iteration = 1
|
||||
total_iterations = recheck_repeat_count if recheck_watches and recheck_repeat_count > 1 else 1
|
||||
|
||||
if total_iterations > 1:
|
||||
logger.info(f"Batch mode: Will repeat recheck {total_iterations} times")
|
||||
else:
|
||||
logger.info("Batch mode: Waiting for all queued items to complete...")
|
||||
|
||||
# Wait a bit for workers to start processing
|
||||
time.sleep(3)
|
||||
|
||||
try:
|
||||
while current_iteration <= total_iterations:
|
||||
logger.info(f"Batch mode: Waiting for iteration {current_iteration}/{total_iterations} to complete...")
|
||||
|
||||
# Use the shared wait_for_all_checks function
|
||||
completed = worker_handler.wait_for_all_checks(update_q, timeout=300)
|
||||
|
||||
if not completed:
|
||||
logger.warning(f"Batch mode: Iteration {current_iteration} timed out after 300 seconds")
|
||||
|
||||
logger.success(f"Batch mode: Iteration {current_iteration}/{total_iterations} completed")
|
||||
|
||||
# Check if we need to repeat
|
||||
if current_iteration < total_iterations:
|
||||
logger.info(f"Batch mode: Starting iteration {current_iteration + 1}...")
|
||||
current_iteration += 1
|
||||
|
||||
# Re-queue watches for next iteration
|
||||
queue_watches_for_recheck(datastore, current_iteration)
|
||||
|
||||
# Brief pause before continuing
|
||||
time.sleep(2)
|
||||
else:
|
||||
# All iterations complete
|
||||
logger.success(f"Batch mode: All {total_iterations} iterations completed, initiating shutdown")
|
||||
# Trigger shutdown
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch mode monitor error: {e}")
|
||||
logger.error(f"Initiating emergency shutdown")
|
||||
import os, signal
|
||||
os.kill(os.getpid(), signal.SIGTERM)
|
||||
|
||||
# Start monitor in background thread
|
||||
monitor_thread = threading.Thread(target=batch_mode_monitor, daemon=True, name="BatchModeMonitor")
|
||||
monitor_thread.start()
|
||||
logger.info("Batch mode enabled: Will exit after all queued items are processed")
|
||||
|
||||
# Get the SocketIO instance from the Flask app (created in flask_app.py)
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
global socketio
|
||||
@@ -314,20 +656,33 @@ def main():
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
||||
|
||||
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
# In batch mode, skip starting the HTTP server - just keep workers running
|
||||
if batch_mode:
|
||||
logger.info("Batch mode: Skipping HTTP server startup, workers will process queue")
|
||||
logger.info("Batch mode: Main thread will wait for shutdown signal")
|
||||
# Keep main thread alive until batch monitor triggers shutdown
|
||||
try:
|
||||
while True:
|
||||
time.sleep(1)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Batch mode: Keyboard interrupt received")
|
||||
pass
|
||||
else:
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
# Normal mode: Start HTTP server
|
||||
# SocketIO instance is already initialized in flask_app.py
|
||||
if socketio_server:
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
socketio.run(app, host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
# Run Flask app without Socket.IO if disabled
|
||||
logger.info("Starting Flask app without Socket.IO server")
|
||||
if ssl_mode:
|
||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
||||
app.run(host=host, port=int(port), debug=False,
|
||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
||||
else:
|
||||
app.run(host=host, port=int(port), debug=False)
|
||||
|
||||
@@ -89,11 +89,16 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
continue
|
||||
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
# RACE CONDITION FIX: Check if this UUID is already being processed by another worker
|
||||
|
||||
# RACE CONDITION FIX: Atomically claim this UUID for processing
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.queuedWatchMetaData import PrioritizedItem
|
||||
if worker_handler.is_watch_running_by_another_worker(uuid, worker_id):
|
||||
logger.trace(f"Worker {worker_id} detected UUID {uuid} already being processed by another worker - deferring")
|
||||
|
||||
# Try to claim the UUID atomically - prevents duplicate processing
|
||||
if not worker_handler.claim_uuid_for_processing(uuid, worker_id):
|
||||
# Already being processed by another worker
|
||||
logger.trace(f"Worker {worker_id} detected UUID {uuid} already being processed - deferring")
|
||||
|
||||
# Sleep to avoid tight loop and give the other worker time to finish
|
||||
await asyncio.sleep(DEFER_SLEEP_TIME_ALREADY_QUEUED)
|
||||
|
||||
@@ -105,9 +110,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
continue
|
||||
|
||||
fetch_start_time = round(time.time())
|
||||
|
||||
# Mark this UUID as being processed by this worker
|
||||
worker_handler.set_uuid_processing(uuid, worker_id=worker_id, processing=True)
|
||||
|
||||
try:
|
||||
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
|
||||
@@ -163,8 +165,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
except ProcessorException as e:
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
e.screenshot = None # Free memory immediately
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -184,9 +188,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -205,8 +211,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data, as_error=True)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
if e.page_text:
|
||||
watch.save_error_text(contents=e.page_text)
|
||||
|
||||
@@ -223,9 +231,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
@@ -317,6 +327,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
@@ -328,6 +339,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code,
|
||||
@@ -369,9 +381,17 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
if changed_detected or not watch.history_n:
|
||||
if update_handler.screenshot:
|
||||
watch.save_screenshot(screenshot=update_handler.screenshot)
|
||||
# Free screenshot memory immediately after saving
|
||||
update_handler.screenshot = None
|
||||
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'screenshot'):
|
||||
update_handler.fetcher.screenshot = None
|
||||
|
||||
if update_handler.xpath_data:
|
||||
watch.save_xpath_data(data=update_handler.xpath_data)
|
||||
# Free xpath data memory
|
||||
update_handler.xpath_data = None
|
||||
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'xpath_data'):
|
||||
update_handler.fetcher.xpath_data = None
|
||||
|
||||
# Ensure unique timestamp for history
|
||||
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
|
||||
@@ -438,6 +458,20 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
update_handler.fetcher.clear_content()
|
||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
||||
|
||||
# Explicitly delete update_handler to free all references
|
||||
if update_handler:
|
||||
del update_handler
|
||||
update_handler = None
|
||||
|
||||
# Force aggressive memory cleanup after clearing
|
||||
import gc
|
||||
gc.collect()
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
||||
@@ -455,8 +489,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
except Exception as e:
|
||||
logger.error(f"Exception while cleaning/quit after calling browser: {e}")
|
||||
try:
|
||||
# Mark UUID as no longer being processed by this worker
|
||||
worker_handler.set_uuid_processing(uuid, worker_id=worker_id, processing=False)
|
||||
# Release UUID from processing (thread-safe)
|
||||
worker_handler.release_uuid_from_processing(uuid, worker_id=worker_id)
|
||||
|
||||
# Send completion signal
|
||||
if watch:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import time
|
||||
import threading
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session, current_app
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
@@ -404,4 +404,25 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/language/auto-detect", methods=['GET'])
|
||||
def delete_locale_language_session_var_if_it_exists():
|
||||
"""Clear the session locale preference to auto-detect from browser Accept-Language header"""
|
||||
if 'locale' in session:
|
||||
session.pop('locale', None)
|
||||
# Refresh Flask-Babel to clear cached locale
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
flash(gettext("Language set to auto-detect from browser"))
|
||||
|
||||
# Check if there's a redirect parameter to return to the same page
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, use it
|
||||
from changedetectionio.is_safe_url import is_safe_url
|
||||
if redirect_url and is_safe_url(redirect_url, current_app):
|
||||
return redirect(redirect_url)
|
||||
|
||||
# Otherwise redirect to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return ui_blueprint
|
||||
@@ -71,10 +71,19 @@ class Fetcher():
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
# Screenshot element locking - prevents layout shifts during screenshot capture
|
||||
# Only needed for visual comparison (image_ssim_diff processor)
|
||||
# Locks element dimensions in the first viewport to prevent headers/ads from resizing
|
||||
lock_viewport_elements = False # Default: disabled for performance
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
# Allow lock_viewport_elements to be set via kwargs
|
||||
if kwargs and 'lock_viewport_elements' in kwargs:
|
||||
self.lock_viewport_elements = kwargs.get('lock_viewport_elements')
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
|
||||
@@ -10,18 +10,20 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, vi
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport_size
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
|
||||
logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
||||
@@ -29,25 +31,31 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport_size['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled (took {lock_time:.2f}s)")
|
||||
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport_size['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
@@ -69,7 +77,11 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
# Restore original viewport size
|
||||
@@ -81,40 +93,54 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
|
||||
# If we have multiple chunks, stitch them together
|
||||
if len(screenshot_chunks) > 1:
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
|
||||
# Only use separate process for many chunks (4+) to avoid blocking the event loop
|
||||
if len(screenshot_chunks) <= 3:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
|
||||
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
else:
|
||||
# Use separate process for many chunks to avoid blocking
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
# Explicit cleanup
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
# Explicit cleanup
|
||||
del screenshot_chunks
|
||||
screenshot_chunks = None
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
|
||||
return screenshot_chunks[0]
|
||||
|
||||
@@ -184,7 +210,8 @@ class fetcher(Fetcher):
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
watch_uuid = getattr(self, 'watch_uuid', None)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Request GC immediately after screenshot to free memory
|
||||
# Screenshots can be large and browser steps take many of them
|
||||
@@ -233,6 +260,7 @@ class fetcher(Fetcher):
|
||||
import playwright._impl._errors
|
||||
import time
|
||||
self.delete_browser_steps_screenshots()
|
||||
self.watch_uuid = watch_uuid # Store for use in screenshot_step
|
||||
response = None
|
||||
|
||||
async with async_playwright() as p:
|
||||
@@ -318,7 +346,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
# Cleanup before raising to prevent memory leak
|
||||
await self.page.close()
|
||||
await context.close()
|
||||
@@ -374,7 +402,17 @@ class fetcher(Fetcher):
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - screenshots are large and base64 decode creates temporary buffers
|
||||
await self.page.request_gc()
|
||||
gc.collect()
|
||||
# Release C-level memory from base64 decode back to OS
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except ScreenshotUnavailable:
|
||||
# Re-raise screenshot unavailable exceptions
|
||||
|
||||
@@ -20,18 +20,20 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
|
||||
logger.debug(f"{watch_info}Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
@@ -50,20 +52,35 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
if page_height > page.viewport['height']:
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page() changes viewport height which triggers @media (min-height) rules
|
||||
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport['height']:
|
||||
lock_start = time.time()
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
file_read_start = time.time()
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
file_read_time = time.time() - file_read_start
|
||||
|
||||
evaluate_start = time.time()
|
||||
await page.evaluate(lock_elements_js)
|
||||
evaluate_time = time.time() - evaluate_start
|
||||
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled - File read: {file_read_time:.3f}s, Browser evaluate: {evaluate_time:.2f}s, Total: {lock_time:.2f}s")
|
||||
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
@@ -82,7 +99,11 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
@@ -93,26 +114,53 @@ async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
logger.debug(
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
screenshot_chunks = None
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
return screenshot_chunks[0]
|
||||
|
||||
|
||||
@@ -357,7 +405,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
@@ -387,7 +435,17 @@ class fetcher(Fetcher):
|
||||
|
||||
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - pyppeteer base64 decode creates temporary buffers
|
||||
import gc
|
||||
gc.collect()
|
||||
# Release C-level memory from base64 decode back to OS
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Lock Element Dimensions for Screenshot Capture
|
||||
* Lock Element Dimensions for Screenshot Capture (First Viewport Only)
|
||||
*
|
||||
* THE PROBLEM:
|
||||
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
|
||||
@@ -10,40 +10,31 @@
|
||||
* However, changing the viewport height triggers CSS media queries like:
|
||||
* @media (min-height: 860px) { .ad { height: 250px; } }
|
||||
*
|
||||
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
|
||||
* - Screenshot shows element at NEW size (after media query triggered)
|
||||
* - xpath element coordinates measured at OLD size (before viewport change)
|
||||
* - Visual selector overlays don't align with screenshot
|
||||
*
|
||||
* EXAMPLE BUG:
|
||||
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
|
||||
* - Viewport changes to 1280x3809 for screenshot
|
||||
* - Media query triggers: ad expands to 250px
|
||||
* - All content below shifts down by 112px (250-138)
|
||||
* - Article now at position: 391px (279+112)
|
||||
* - But xpath data says 279px → 112px mismatch! ✗
|
||||
* This causes elements (especially ads/headers) to resize during screenshot capture.
|
||||
*
|
||||
* THE SOLUTION:
|
||||
* Before changing viewport, lock ALL element dimensions with !important inline styles.
|
||||
* Inline styles with !important override media query CSS, preventing layout changes.
|
||||
* Lock element dimensions in the FIRST VIEWPORT ONLY with !important inline styles.
|
||||
* This prevents headers, navigation, and top ads from resizing when viewport changes.
|
||||
* We only lock the visible portion because:
|
||||
* - Most layout shifts happen in headers/navbars/top ads
|
||||
* - Locking only visible elements is 100x+ faster (100-200 elements vs 10,000+)
|
||||
* - Below-fold content shifts don't affect visual comparison accuracy
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Iterates through every element on the page
|
||||
* 2. Captures current computed dimensions (width, height)
|
||||
* 3. Sets inline styles with !important to freeze those dimensions
|
||||
* 1. Gets current viewport height
|
||||
* 2. Finds elements within first viewport (top of page to bottom of screen)
|
||||
* 3. Locks their dimensions with !important inline styles
|
||||
* 4. Disables ResizeObserver API (for JS-based resizing)
|
||||
* 5. When viewport changes for screenshot, media queries can't resize anything
|
||||
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
|
||||
* The page must be fully loaded and settled at its initial viewport size.
|
||||
* No need to restore state afterward - page is closed after screenshot.
|
||||
* Only enabled for image_ssim_diff processor (visual comparison).
|
||||
* Default: OFF for performance.
|
||||
*
|
||||
* PERFORMANCE:
|
||||
* - Iterates all DOM elements (can be 1000s on complex pages)
|
||||
* - Typically completes in 50-200ms
|
||||
* - One-time cost before screenshot, well worth it for coordinate accuracy
|
||||
* - Only processes 100-300 elements (first viewport) vs 10,000+ (entire page)
|
||||
* - Typically completes in 10-50ms
|
||||
* - 100x+ faster than locking entire page
|
||||
*
|
||||
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
|
||||
*/
|
||||
@@ -52,11 +43,34 @@
|
||||
// Store original styles in a global WeakMap for later restoration
|
||||
window.__elementSizingRestore = new WeakMap();
|
||||
|
||||
// Lock ALL element dimensions to prevent media query layout changes
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const rect = el.getBoundingClientRect();
|
||||
const start = performance.now();
|
||||
|
||||
// Get current viewport height (visible portion of page)
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
// Get all elements and filter to FIRST VIEWPORT ONLY
|
||||
// This dramatically reduces elements to process (100-300 vs 10,000+)
|
||||
const allElements = Array.from(document.querySelectorAll('*'));
|
||||
|
||||
// BATCH READ PHASE: Get bounding rects and filter to viewport
|
||||
const measurements = allElements.map(el => {
|
||||
const rect = el.getBoundingClientRect();
|
||||
const computed = window.getComputedStyle(el);
|
||||
|
||||
// Only lock elements in the first viewport (visible on initial page load)
|
||||
// rect.top < viewportHeight means element starts within visible area
|
||||
const inViewport = rect.top < viewportHeight && rect.top >= 0;
|
||||
const hasSize = rect.height > 0 && rect.width > 0;
|
||||
|
||||
return inViewport && hasSize ? { el, computed, rect } : null;
|
||||
}).filter(Boolean); // Remove null entries
|
||||
|
||||
const elapsed = performance.now() - start;
|
||||
console.log(`Locked first viewport elements: ${measurements.length} of ${allElements.length} total elements (viewport height: ${viewportHeight}px, took ${elapsed.toFixed(0)}ms)`);
|
||||
|
||||
// BATCH WRITE PHASE: Apply all inline styles without triggering layout
|
||||
// No interleaved reads means browser can optimize style application
|
||||
measurements.forEach(({el, computed, rect}) => {
|
||||
// Save original inline style values BEFORE locking
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
const originalStyles = {};
|
||||
@@ -89,5 +103,5 @@
|
||||
disconnect() {}
|
||||
};
|
||||
|
||||
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
|
||||
console.log(`✓ Element dimensions locked (${measurements.length} elements) to prevent media query changes during screenshot`);
|
||||
})();
|
||||
|
||||
@@ -8,92 +8,42 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
||||
|
||||
# Cache font to avoid loading on every stitch
|
||||
_cached_font = None
|
||||
|
||||
def _get_caption_font():
|
||||
"""Get or create cached font for caption text."""
|
||||
global _cached_font
|
||||
if _cached_font is None:
|
||||
from PIL import ImageFont
|
||||
try:
|
||||
_cached_font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
_cached_font = ImageFont.load_default()
|
||||
return _cached_font
|
||||
|
||||
|
||||
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together inline (no multiprocessing).
|
||||
Optimized for small number of chunks (2-3) to avoid process creation overhead.
|
||||
|
||||
Args:
|
||||
chunks_bytes: List of JPEG image bytes
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
|
||||
Returns:
|
||||
bytes: Stitched JPEG image
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
# Create stitched image
|
||||
stitched = Image.new('RGB', (max_width, total_height))
|
||||
y_offset = 0
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font = _get_caption_font()
|
||||
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode to JPEG
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result = output.getvalue()
|
||||
|
||||
# Cleanup
|
||||
stitched.close()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
||||
def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
Used for large number of chunks (4+) to avoid blocking the main event loop.
|
||||
|
||||
Uses spawn multiprocessing to isolate PIL's C-level memory allocation.
|
||||
When the subprocess exits, the OS reclaims ALL memory including C-level allocations
|
||||
that Python's GC cannot release. This prevents the ~50MB per stitch from accumulating
|
||||
in the main process.
|
||||
|
||||
Trade-off: Adds 35MB resource_tracker subprocess, but prevents 500MB+ memory leak
|
||||
in main process (much better at scale: 35GB vs 500GB for 1000 instances).
|
||||
|
||||
Args:
|
||||
pipe_conn: Pipe connection to receive data and send result
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
import struct
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
try:
|
||||
# Receive chunk count as 4-byte integer (no pickle!)
|
||||
count_bytes = pipe_conn.recv_bytes()
|
||||
chunk_count = struct.unpack('I', count_bytes)[0]
|
||||
|
||||
# Receive each chunk as raw bytes (no pickle!)
|
||||
chunks_bytes = []
|
||||
for _ in range(chunk_count):
|
||||
chunks_bytes.append(pipe_conn.recv_bytes())
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
del chunks_bytes
|
||||
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
@@ -103,15 +53,14 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
im.close()
|
||||
del images
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
|
||||
# Try to load font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
@@ -120,23 +69,26 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode and send image with optimization
|
||||
# Encode and send
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
pipe_conn.send_bytes(output.getvalue())
|
||||
result_bytes = output.getvalue()
|
||||
|
||||
stitched.close()
|
||||
del stitched
|
||||
output.close()
|
||||
del output
|
||||
|
||||
pipe_conn.send_bytes(result_bytes)
|
||||
del result_bytes
|
||||
|
||||
except Exception as e:
|
||||
pipe_conn.send(f"error:{e}")
|
||||
logger.error(f"Error in stitch_images_worker_raw_bytes: {e}")
|
||||
error_msg = f"error:{e}".encode('utf-8')
|
||||
pipe_conn.send_bytes(error_msg)
|
||||
finally:
|
||||
pipe_conn.close()
|
||||
|
||||
|
||||
|
||||
@@ -94,6 +94,14 @@ if os.getenv('FLASK_SERVER_NAME'):
|
||||
app.config['BABEL_TRANSLATION_DIRECTORIES'] = str(Path(__file__).parent / 'translations')
|
||||
app.config['BABEL_DEFAULT_LOCALE'] = 'en_GB'
|
||||
|
||||
# Session configuration
|
||||
# NOTE: Flask session (for locale, etc.) is separate from Flask-Login's remember-me cookie
|
||||
# - Flask session stores data like session['locale'] in a signed cookie
|
||||
# - Flask-Login's remember=True creates a separate authentication cookie
|
||||
# - Setting PERMANENT_SESSION_LIFETIME controls how long the Flask session cookie lasts
|
||||
from datetime import timedelta
|
||||
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=3650) # ~10 years (effectively unlimited)
|
||||
|
||||
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
|
||||
|
||||
|
||||
@@ -393,7 +401,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE'] = datastore_o
|
||||
|
||||
|
||||
# Store batch mode flag to skip background threads when running in batch mode
|
||||
app.config['batch_mode'] = config.get('batch_mode', False) if config else False
|
||||
|
||||
# Store the signal in the app config to ensure it's accessible everywhere
|
||||
app.config['watch_check_update_SIGNAL'] = watch_check_update
|
||||
|
||||
@@ -550,6 +561,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# Validate the locale against available languages
|
||||
if locale in language_codes:
|
||||
# Make session permanent so language preference persists across browser sessions
|
||||
# NOTE: This is the Flask session cookie (separate from Flask-Login's remember-me auth cookie)
|
||||
session.permanent = True
|
||||
session['locale'] = locale
|
||||
|
||||
# CRITICAL: Flask-Babel caches the locale in the request context (ctx.babel_locale)
|
||||
@@ -794,6 +808,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# Initialize Socket.IO server conditionally based on settings
|
||||
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
if socket_io_enabled and app.config.get('batch_mode'):
|
||||
socket_io_enabled = False
|
||||
if socket_io_enabled:
|
||||
from changedetectionio.realtime.socket_server import init_socketio
|
||||
global socketio_server
|
||||
@@ -891,14 +907,19 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
logger.info(f"Starting {n_workers} workers during app initialization")
|
||||
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
threading.Thread(target=notification_runner, daemon=True, name="NotificationRunner").start()
|
||||
# Skip background threads in batch mode (just process queue and exit)
|
||||
batch_mode = app.config.get('batch_mode', False)
|
||||
if not batch_mode:
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
threading.Thread(target=notification_runner, daemon=True, name="NotificationRunner").start()
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
||||
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
|
||||
else:
|
||||
logger.info("Batch mode: Skipping ticker thread, notification runner, and version checker")
|
||||
|
||||
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
|
||||
# This avoids circular dependencies
|
||||
|
||||
@@ -20,8 +20,9 @@ mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86
|
||||
|
||||
def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
Save compressed data using native brotli.
|
||||
Testing shows no memory leak when using gc.collect() after compression.
|
||||
Save compressed data using native brotli with streaming compression.
|
||||
Uses chunked compression to minimize peak memory usage and malloc_trim()
|
||||
to force release of C-level memory back to the OS.
|
||||
|
||||
Args:
|
||||
contents: data to compress (str or bytes)
|
||||
@@ -37,27 +38,52 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
import brotli
|
||||
import gc
|
||||
import ctypes
|
||||
|
||||
# Ensure contents are bytes
|
||||
if isinstance(contents, str):
|
||||
contents = contents.encode('utf-8')
|
||||
|
||||
try:
|
||||
logger.debug(f"Starting brotli compression of {len(contents)} bytes.")
|
||||
original_size = len(contents)
|
||||
logger.debug(f"Starting brotli streaming compression of {original_size} bytes.")
|
||||
|
||||
if mode is not None:
|
||||
compressed_data = brotli.compress(contents, mode=mode)
|
||||
else:
|
||||
compressed_data = brotli.compress(contents)
|
||||
# Create streaming compressor
|
||||
compressor = brotli.Compressor(quality=6, mode=mode if mode is not None else brotli.MODE_GENERIC)
|
||||
|
||||
# Stream compress in chunks to minimize memory usage
|
||||
chunk_size = 65536 # 64KB chunks
|
||||
total_compressed_size = 0
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(compressed_data)
|
||||
# Process data in chunks
|
||||
offset = 0
|
||||
while offset < len(contents):
|
||||
chunk = contents[offset:offset + chunk_size]
|
||||
compressed_chunk = compressor.process(chunk)
|
||||
if compressed_chunk:
|
||||
f.write(compressed_chunk)
|
||||
total_compressed_size += len(compressed_chunk)
|
||||
offset += chunk_size
|
||||
|
||||
logger.debug(f"Finished brotli compression - From {len(contents)} to {len(compressed_data)} bytes.")
|
||||
# Finalize compression - critical for proper cleanup
|
||||
final_chunk = compressor.finish()
|
||||
if final_chunk:
|
||||
f.write(final_chunk)
|
||||
total_compressed_size += len(final_chunk)
|
||||
|
||||
# Force garbage collection to prevent memory buildup
|
||||
logger.debug(f"Finished brotli compression - From {original_size} to {total_compressed_size} bytes.")
|
||||
|
||||
# Cleanup: Delete compressor, force Python GC, then force C-level memory release
|
||||
del compressor
|
||||
gc.collect()
|
||||
|
||||
# Force release of C-level memory back to OS (since brotli is a C library)
|
||||
try:
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass # malloc_trim not available on all systems (e.g., macOS)
|
||||
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -1,19 +1,25 @@
|
||||
{
|
||||
"name": "",
|
||||
"short_name": "",
|
||||
"name": "ChangeDetection.io",
|
||||
"short_name": "ChangeDetect",
|
||||
"description": "Self-hosted website change detection and monitoring",
|
||||
"icons": [
|
||||
{
|
||||
"src": "android-chrome-192x192.png",
|
||||
"sizes": "192x192",
|
||||
"type": "image/png"
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
},
|
||||
{
|
||||
"src": "android-chrome-256x256.png",
|
||||
"sizes": "256x256",
|
||||
"type": "image/png"
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
}
|
||||
],
|
||||
"theme_color": "#ffffff",
|
||||
"start_url": "/",
|
||||
"theme_color": "#5bbad5",
|
||||
"background_color": "#ffffff",
|
||||
"display": "standalone"
|
||||
"display": "standalone",
|
||||
"categories": ["utilities", "productivity"],
|
||||
"orientation": "any"
|
||||
}
|
||||
|
||||
@@ -338,7 +338,6 @@ class ChangeDetectionStore:
|
||||
self.needs_write_urgent = True
|
||||
|
||||
def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
|
||||
import requests
|
||||
|
||||
if extras is None:
|
||||
extras = {}
|
||||
@@ -349,6 +348,8 @@ class ChangeDetectionStore:
|
||||
|
||||
# Was it a share link? try to fetch the data
|
||||
if (url.startswith("https://changedetection.io/share/")):
|
||||
import requests
|
||||
|
||||
try:
|
||||
r = requests.request(method="GET",
|
||||
url=url,
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="{{url_for('static_content', group='favicons', filename='apple-touch-icon.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="{{url_for('static_content', group='favicons', filename='favicon-32x32.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="{{url_for('static_content', group='favicons', filename='favicon-16x16.png')}}">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}" crossorigin="use-credentials">
|
||||
<link rel="mask-icon" href="{{url_for('static_content', group='favicons', filename='safari-pinned-tab.svg')}}" color="#5bbad5">
|
||||
<link rel="shortcut icon" href="{{url_for('static_content', group='favicons', filename='favicon.ico')}}">
|
||||
<meta name="msapplication-TileColor" content="#da532c">
|
||||
@@ -265,6 +265,9 @@
|
||||
</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div>
|
||||
<a href="{{ url_for('ui.delete_locale_language_session_var_if_it_exists', redirect=request.path) }}" >{{ _('Auto-detect from browser') }}</a>
|
||||
</div>
|
||||
<div>
|
||||
{{ _('Language support is in beta, please help us improve by opening a PR on GitHub with any updates.') }}
|
||||
</div>
|
||||
|
||||
243
changedetectionio/test_cli_opts.sh
Executable file
243
changedetectionio/test_cli_opts.sh
Executable file
@@ -0,0 +1,243 @@
|
||||
#!/bin/bash
|
||||
# Test script for CLI options - Parallel execution
|
||||
# Tests -u, -uN, -r, -b flags
|
||||
|
||||
set -u # Exit on undefined variables
|
||||
|
||||
# Color output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Test results directory (for parallel safety)
|
||||
TEST_RESULTS_DIR="/tmp/cli-test-results-$$"
|
||||
mkdir -p "$TEST_RESULTS_DIR"
|
||||
|
||||
# Cleanup function
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "=== Cleaning up test directories ==="
|
||||
rm -rf /tmp/cli-test-* 2>/dev/null || true
|
||||
rm -rf "$TEST_RESULTS_DIR" 2>/dev/null || true
|
||||
# Kill any hanging processes
|
||||
pkill -f "changedetection.py.*cli-test" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Helper to record test result
|
||||
record_result() {
|
||||
local test_num=$1
|
||||
local status=$2 # pass or fail
|
||||
local message=$3
|
||||
|
||||
echo "$status|$message" > "$TEST_RESULTS_DIR/test_${test_num}.result"
|
||||
}
|
||||
|
||||
# Run a test in background
|
||||
run_test() {
|
||||
local test_num=$1
|
||||
local test_name=$2
|
||||
local test_func=$3
|
||||
|
||||
(
|
||||
echo -e "${YELLOW}[Test $test_num]${NC} $test_name"
|
||||
if $test_func "$test_num"; then
|
||||
record_result "$test_num" "pass" "$test_name"
|
||||
echo -e "${GREEN}✓ PASS${NC}: $test_name"
|
||||
else
|
||||
record_result "$test_num" "fail" "$test_name"
|
||||
echo -e "${RED}✗ FAIL${NC}: $test_name"
|
||||
fi
|
||||
) &
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Test Functions (each runs independently)
|
||||
# =============================================================================
|
||||
|
||||
test_help_flag() {
|
||||
local test_id=$1
|
||||
timeout 3 python3 changedetection.py --help 2>&1 | grep -q "Add URLs on startup"
|
||||
}
|
||||
|
||||
test_version_flag() {
|
||||
local test_id=$1
|
||||
timeout 3 python3 changedetection.py --version 2>&1 | grep -qE "changedetection.io [0-9]+\.[0-9]+"
|
||||
}
|
||||
|
||||
test_single_url() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-single-${test_id}-$$"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C -u https://example.com -b &>/dev/null
|
||||
[ -f "$dir/url-watches.json" ] && \
|
||||
[ "$(python3 -c "import json; print(len(json.load(open('$dir/url-watches.json')).get('watching', {})))")" -eq 1 ]
|
||||
}
|
||||
|
||||
test_multiple_urls() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-multi-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-u https://httpbin.org \
|
||||
-b &>/dev/null
|
||||
[ -f "$dir/url-watches.json" ] && \
|
||||
[ "$(python3 -c "import json; print(len(json.load(open('$dir/url-watches.json')).get('watching', {})))")" -eq 3 ]
|
||||
}
|
||||
|
||||
test_url_with_options() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-opts-${test_id}-$$"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 '{"title":"Test Site","processor":"text_json_diff"}' \
|
||||
-b &>/dev/null
|
||||
[ -f "$dir/url-watches.json" ] && \
|
||||
python3 -c "import json; data=json.load(open('$dir/url-watches.json')); watches=data.get('watching', {}); exit(0 if any(w.get('title')=='Test Site' for w in watches.values()) else 1)"
|
||||
}
|
||||
|
||||
test_multiple_urls_with_options() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-multi-opts-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 '{"title":"Site One"}' \
|
||||
-u https://github.com \
|
||||
-u1 '{"title":"Site Two"}' \
|
||||
-b &>/dev/null
|
||||
[ -f "$dir/url-watches.json" ] && \
|
||||
[ "$(python3 -c "import json; print(len(json.load(open('$dir/url-watches.json')).get('watching', {})))")" -eq 2 ] && \
|
||||
python3 -c "import json; data=json.load(open('$dir/url-watches.json')); watches=data.get('watching', {}); titles=[w.get('title') for w in watches.values()]; exit(0 if 'Site One' in titles and 'Site Two' in titles else 1)"
|
||||
}
|
||||
|
||||
test_batch_mode_exit() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-batch-${test_id}-$$"
|
||||
local start=$(date +%s)
|
||||
timeout 15 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-b &>/dev/null
|
||||
local end=$(date +%s)
|
||||
local elapsed=$((end - start))
|
||||
[ $elapsed -lt 14 ]
|
||||
}
|
||||
|
||||
test_recheck_all() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-recheck-all-${test_id}-$$"
|
||||
mkdir -p "$dir"
|
||||
cat > "$dir/url-watches.json" << 'EOF'
|
||||
{"watching":{"test-uuid":{"url":"https://example.com","last_checked":0,"processor":"text_json_diff","uuid":"test-uuid"}},"settings":{"application":{"password":false}}}
|
||||
EOF
|
||||
timeout 10 python3 changedetection.py -d "$dir" -r all -b 2>&1 | grep -q "Queuing all"
|
||||
}
|
||||
|
||||
test_recheck_specific() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-recheck-uuid-${test_id}-$$"
|
||||
mkdir -p "$dir"
|
||||
cat > "$dir/url-watches.json" << 'EOF'
|
||||
{"watching":{"uuid-1":{"url":"https://example.com","last_checked":0,"processor":"text_json_diff","uuid":"uuid-1"},"uuid-2":{"url":"https://github.com","last_checked":0,"processor":"text_json_diff","uuid":"uuid-2"}},"settings":{"application":{"password":false}}}
|
||||
EOF
|
||||
timeout 10 python3 changedetection.py -d "$dir" -r uuid-1,uuid-2 -b 2>&1 | grep -q "Queuing 2 specific watches"
|
||||
}
|
||||
|
||||
test_combined_operations() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-combined-${test_id}-$$"
|
||||
timeout 12 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u https://github.com \
|
||||
-r all \
|
||||
-b &>/dev/null
|
||||
[ -f "$dir/url-watches.json" ] && \
|
||||
[ "$(python3 -c "import json; print(len(json.load(open('$dir/url-watches.json')).get('watching', {})))")" -eq 2 ]
|
||||
}
|
||||
|
||||
test_invalid_json() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-invalid-${test_id}-$$"
|
||||
timeout 5 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-u0 'invalid json here' \
|
||||
2>&1 | grep -qi "invalid json\|json decode error"
|
||||
}
|
||||
|
||||
test_create_directory() {
|
||||
local test_id=$1
|
||||
local dir="/tmp/cli-test-create-${test_id}-$$/nested/path"
|
||||
timeout 10 python3 changedetection.py -d "$dir" -C \
|
||||
-u https://example.com \
|
||||
-b &>/dev/null
|
||||
[ -d "$dir" ]
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Main Test Execution
|
||||
# =============================================================================
|
||||
|
||||
echo "=========================================="
|
||||
echo " CLI Options Test Suite (Parallel)"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Launch all tests in parallel
|
||||
run_test 1 "Help flag (--help) shows usage without initialization" test_help_flag
|
||||
run_test 2 "Version flag (--version) displays version" test_version_flag
|
||||
run_test 3 "Add single URL with -u flag" test_single_url
|
||||
run_test 4 "Add multiple URLs with multiple -u flags" test_multiple_urls
|
||||
run_test 5 "Add URL with JSON options using -u0" test_url_with_options
|
||||
run_test 6 "Add multiple URLs with different options (-u0, -u1)" test_multiple_urls_with_options
|
||||
run_test 7 "Batch mode (-b) exits automatically after processing" test_batch_mode_exit
|
||||
run_test 8 "Recheck all watches with -r all" test_recheck_all
|
||||
run_test 9 "Recheck specific watches with -r UUID" test_recheck_specific
|
||||
run_test 10 "Combined: Add URLs and recheck all with -u and -r all" test_combined_operations
|
||||
run_test 11 "Invalid JSON in -u0 option should show error" test_invalid_json
|
||||
run_test 12 "Create datastore directory with -C flag" test_create_directory
|
||||
|
||||
# Wait for all tests to complete
|
||||
echo ""
|
||||
echo "Waiting for all tests to complete..."
|
||||
wait
|
||||
|
||||
# Collect results
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " Test Summary"
|
||||
echo "=========================================="
|
||||
|
||||
TESTS_RUN=0
|
||||
TESTS_PASSED=0
|
||||
TESTS_FAILED=0
|
||||
|
||||
for result_file in "$TEST_RESULTS_DIR"/test_*.result; do
|
||||
if [ -f "$result_file" ]; then
|
||||
TESTS_RUN=$((TESTS_RUN + 1))
|
||||
status=$(cut -d'|' -f1 < "$result_file")
|
||||
if [ "$status" = "pass" ]; then
|
||||
TESTS_PASSED=$((TESTS_PASSED + 1))
|
||||
else
|
||||
TESTS_FAILED=$((TESTS_FAILED + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Tests run: $TESTS_RUN"
|
||||
echo -e "${GREEN}Tests passed: $TESTS_PASSED${NC}"
|
||||
if [ $TESTS_FAILED -gt 0 ]; then
|
||||
echo -e "${RED}Tests failed: $TESTS_FAILED${NC}"
|
||||
else
|
||||
echo -e "${GREEN}Tests failed: $TESTS_FAILED${NC}"
|
||||
fi
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Exit with appropriate code
|
||||
if [ $TESTS_FAILED -gt 0 ]; then
|
||||
echo -e "${RED}Some tests failed!${NC}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "${GREEN}All tests passed!${NC}"
|
||||
exit 0
|
||||
fi
|
||||
@@ -5,13 +5,11 @@ from threading import Thread
|
||||
|
||||
import pytest
|
||||
import arrow
|
||||
from changedetectionio import changedetection_app
|
||||
from changedetectionio import store
|
||||
import os
|
||||
import sys
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.flask_app import init_app_secret
|
||||
from changedetectionio.flask_app import init_app_secret, changedetection_app
|
||||
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
|
||||
|
||||
# https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py
|
||||
|
||||
@@ -160,7 +160,7 @@ def test_invalid_locale(client, live_server, measure_memory_usage, datastore_pat
|
||||
def test_language_persistence_in_session(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that the language preference persists across multiple requests
|
||||
within the same session.
|
||||
within the same session, and that auto-detect properly clears the preference.
|
||||
"""
|
||||
|
||||
# Establish session cookie
|
||||
@@ -184,6 +184,34 @@ def test_language_persistence_in_session(client, live_server, measure_memory_usa
|
||||
assert res.status_code == 200
|
||||
assert b"Annulla" in res.data, "Italian text should persist across requests"
|
||||
|
||||
# Verify locale is in session
|
||||
with client.session_transaction() as sess:
|
||||
assert sess.get('locale') == 'it', "Locale should be set in session"
|
||||
|
||||
# Call auto-detect to clear the locale
|
||||
res = client.get(
|
||||
url_for("ui.delete_locale_language_session_var_if_it_exists"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
# Verify the flash message appears (in English since we cleared the locale)
|
||||
assert b"Language set to auto-detect from browser" in res.data, "Should show flash message"
|
||||
|
||||
# Verify locale was removed from session
|
||||
with client.session_transaction() as sess:
|
||||
assert 'locale' not in sess, "Locale should be removed from session after auto-detect"
|
||||
|
||||
# Now requests should use browser default (English in test environment)
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
assert b"Cancel" in res.data, "Should show English after auto-detect clears Italian"
|
||||
assert b"Annulla" not in res.data, "Should not show Italian after auto-detect"
|
||||
|
||||
|
||||
def test_set_language_with_redirect(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
|
||||
@@ -140,38 +140,13 @@ def delete_all_watches(client=None):
|
||||
def wait_for_all_checks(client=None):
|
||||
"""
|
||||
Waits until the queue is empty and workers are idle.
|
||||
Much faster than the original with adaptive timing.
|
||||
Delegates to worker_handler.wait_for_all_checks for shared logic.
|
||||
"""
|
||||
from changedetectionio.flask_app import update_q as global_update_q
|
||||
from changedetectionio import worker_handler
|
||||
empty_since = None
|
||||
attempt = 0
|
||||
max_attempts = 150 # Still reasonable upper bound
|
||||
|
||||
while attempt < max_attempts:
|
||||
# Start with fast checks, slow down if needed
|
||||
if attempt < 10:
|
||||
time.sleep(0.2) # Very fast initial checks
|
||||
elif attempt < 30:
|
||||
time.sleep(0.4) # Medium speed
|
||||
else:
|
||||
time.sleep(0.8) # Slower for persistent issues
|
||||
|
||||
q_length = global_update_q.qsize()
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
any_workers_busy = len(running_uuids) > 0
|
||||
|
||||
if q_length == 0 and not any_workers_busy:
|
||||
if empty_since is None:
|
||||
empty_since = time.time()
|
||||
# Brief stabilization period for async workers
|
||||
elif time.time() - empty_since >= 0.3:
|
||||
break
|
||||
else:
|
||||
empty_since = None
|
||||
|
||||
attempt += 1
|
||||
time.sleep(0.3)
|
||||
# Use the shared wait logic from worker_handler
|
||||
return worker_handler.wait_for_all_checks(global_update_q, timeout=150)
|
||||
|
||||
def wait_for_watch_history(client, min_history_count=2, timeout=10):
|
||||
"""
|
||||
|
||||
101
changedetectionio/translations/README.md
Normal file
101
changedetectionio/translations/README.md
Normal file
@@ -0,0 +1,101 @@
|
||||
# Translation Guide
|
||||
|
||||
## Updating Translations
|
||||
|
||||
To maintain consistency and minimize unnecessary changes in translation files, run these commands:
|
||||
|
||||
```bash
|
||||
python setup.py extract_messages # Extract translatable strings
|
||||
python setup.py update_catalog # Update all language files
|
||||
python setup.py compile_catalog # Compile to binary .mo files
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
All translation settings are configured in **`../../setup.cfg`** (single source of truth).
|
||||
|
||||
The configuration below is shown for reference - **edit `setup.cfg` to change settings**:
|
||||
|
||||
```ini
|
||||
[extract_messages]
|
||||
# Extract translatable strings from source code
|
||||
mapping_file = babel.cfg
|
||||
output_file = changedetectionio/translations/messages.pot
|
||||
input_paths = changedetectionio
|
||||
keywords = _ _l gettext
|
||||
# Options to reduce unnecessary changes in .pot files
|
||||
sort_by_file = true # Keeps entries ordered by file path
|
||||
width = 120 # Consistent line width (prevents rewrapping)
|
||||
add_location = file # Show file path only (not line numbers)
|
||||
|
||||
[update_catalog]
|
||||
# Update existing .po files with new strings from .pot
|
||||
# Note: 'locale' is omitted - Babel auto-discovers all catalogs in output_dir
|
||||
input_file = changedetectionio/translations/messages.pot
|
||||
output_dir = changedetectionio/translations
|
||||
domain = messages
|
||||
# Options for consistent formatting
|
||||
width = 120 # Consistent line width
|
||||
no_fuzzy_matching = true # Avoids incorrect automatic matches
|
||||
|
||||
[compile_catalog]
|
||||
# Compile .po files to .mo binary format
|
||||
directory = changedetectionio/translations
|
||||
domain = messages
|
||||
```
|
||||
|
||||
**Key formatting options:**
|
||||
- `sort_by_file = true` - Orders entries by file path (consistent ordering)
|
||||
- `width = 120` - Fixed line width prevents text rewrapping
|
||||
- `add_location = file` - Shows file path only, not line numbers (reduces git churn)
|
||||
- `no_fuzzy_matching = true` - Prevents incorrect automatic fuzzy matches
|
||||
|
||||
## Why Use These Commands?
|
||||
|
||||
Running pybabel commands directly without consistent options causes:
|
||||
- ❌ Entries get reordered differently each time
|
||||
- ❌ Text gets rewrapped at different widths
|
||||
- ❌ Line numbers change every edit (if not configured)
|
||||
- ❌ Large diffs that make code review difficult
|
||||
|
||||
Using `python setup.py` commands ensures:
|
||||
- ✅ Consistent ordering (by file path, not alphabetically)
|
||||
- ✅ Consistent line width (120 characters, no rewrapping)
|
||||
- ✅ File-only locations (no line number churn)
|
||||
- ✅ No fuzzy matching (prevents incorrect auto-translations)
|
||||
- ✅ Minimal diffs (only actual changes show up)
|
||||
- ✅ Easier code review and git history
|
||||
|
||||
These commands read settings from `../../setup.cfg` automatically.
|
||||
|
||||
## Supported Languages
|
||||
|
||||
- `cs` - Czech (Čeština)
|
||||
- `de` - German (Deutsch)
|
||||
- `en_GB` - English (UK)
|
||||
- `en_US` - English (US)
|
||||
- `fr` - French (Français)
|
||||
- `it` - Italian (Italiano)
|
||||
- `ko` - Korean (한국어)
|
||||
- `zh` - Chinese Simplified (中文简体)
|
||||
- `zh_Hant_TW` - Chinese Traditional (繁體中文)
|
||||
|
||||
## Adding a New Language
|
||||
|
||||
1. Initialize the new language catalog:
|
||||
```bash
|
||||
pybabel init -i changedetectionio/translations/messages.pot -d changedetectionio/translations -l NEW_LANG_CODE
|
||||
```
|
||||
2. Compile it:
|
||||
```bash
|
||||
python setup.py compile_catalog
|
||||
```
|
||||
|
||||
Babel will auto-discover the new language on subsequent translation updates.
|
||||
|
||||
## Translation Notes
|
||||
|
||||
From CLAUDE.md:
|
||||
- Always use "monitor" or "watcher" terminology (not "clock")
|
||||
- Use the most brief wording suitable
|
||||
- When finding issues in one language, check ALL languages for the same issue
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -17,6 +17,7 @@ worker_threads = [] # List of WorkerThread objects
|
||||
|
||||
# Track currently processing UUIDs for async workers - maps {uuid: worker_id}
|
||||
currently_processing_uuids = {}
|
||||
_uuid_processing_lock = threading.Lock() # Protects currently_processing_uuids
|
||||
|
||||
# Configuration - async workers only
|
||||
USE_ASYNC_WORKERS = True
|
||||
@@ -207,31 +208,80 @@ def get_worker_count():
|
||||
|
||||
def get_running_uuids():
|
||||
"""Get list of UUIDs currently being processed by async workers"""
|
||||
return list(currently_processing_uuids.keys())
|
||||
with _uuid_processing_lock:
|
||||
return list(currently_processing_uuids.keys())
|
||||
|
||||
|
||||
def claim_uuid_for_processing(uuid, worker_id):
|
||||
"""
|
||||
Atomically check if UUID is available and claim it for processing.
|
||||
|
||||
This is thread-safe and prevents race conditions where multiple workers
|
||||
try to process the same UUID simultaneously.
|
||||
|
||||
Args:
|
||||
uuid: The watch UUID to claim
|
||||
worker_id: The ID of the worker claiming this UUID
|
||||
|
||||
Returns:
|
||||
True if successfully claimed (UUID was not being processed)
|
||||
False if already being processed by another worker
|
||||
"""
|
||||
with _uuid_processing_lock:
|
||||
if uuid in currently_processing_uuids:
|
||||
# Already being processed by another worker
|
||||
return False
|
||||
# Claim it atomically
|
||||
currently_processing_uuids[uuid] = worker_id
|
||||
logger.debug(f"Worker {worker_id} claimed UUID: {uuid}")
|
||||
return True
|
||||
|
||||
|
||||
def release_uuid_from_processing(uuid, worker_id):
|
||||
"""
|
||||
Release a UUID from processing (thread-safe).
|
||||
|
||||
Args:
|
||||
uuid: The watch UUID to release
|
||||
worker_id: The ID of the worker releasing this UUID
|
||||
"""
|
||||
with _uuid_processing_lock:
|
||||
# Only remove if this worker owns it (defensive)
|
||||
if currently_processing_uuids.get(uuid) == worker_id:
|
||||
currently_processing_uuids.pop(uuid, None)
|
||||
logger.debug(f"Worker {worker_id} released UUID: {uuid}")
|
||||
else:
|
||||
logger.warning(f"Worker {worker_id} tried to release UUID {uuid} but doesn't own it (owned by {currently_processing_uuids.get(uuid, 'nobody')})")
|
||||
|
||||
|
||||
def set_uuid_processing(uuid, worker_id=None, processing=True):
|
||||
"""Mark a UUID as being processed or completed by a specific worker"""
|
||||
global currently_processing_uuids
|
||||
"""
|
||||
Mark a UUID as being processed or completed by a specific worker.
|
||||
|
||||
DEPRECATED: Use claim_uuid_for_processing() and release_uuid_from_processing() instead.
|
||||
This function is kept for backward compatibility but doesn't provide atomic check-and-set.
|
||||
"""
|
||||
if processing:
|
||||
currently_processing_uuids[uuid] = worker_id
|
||||
logger.debug(f"Worker {worker_id} started processing UUID: {uuid}")
|
||||
with _uuid_processing_lock:
|
||||
currently_processing_uuids[uuid] = worker_id
|
||||
logger.debug(f"Worker {worker_id} started processing UUID: {uuid}")
|
||||
else:
|
||||
currently_processing_uuids.pop(uuid, None)
|
||||
logger.debug(f"Worker {worker_id} finished processing UUID: {uuid}")
|
||||
release_uuid_from_processing(uuid, worker_id)
|
||||
|
||||
|
||||
def is_watch_running(watch_uuid):
|
||||
"""Check if a specific watch is currently being processed by any worker"""
|
||||
return watch_uuid in currently_processing_uuids
|
||||
with _uuid_processing_lock:
|
||||
return watch_uuid in currently_processing_uuids
|
||||
|
||||
|
||||
def is_watch_running_by_another_worker(watch_uuid, current_worker_id):
|
||||
"""Check if a specific watch is currently being processed by a different worker"""
|
||||
if watch_uuid not in currently_processing_uuids:
|
||||
return False
|
||||
processing_worker_id = currently_processing_uuids[watch_uuid]
|
||||
return processing_worker_id != current_worker_id
|
||||
with _uuid_processing_lock:
|
||||
if watch_uuid not in currently_processing_uuids:
|
||||
return False
|
||||
processing_worker_id = currently_processing_uuids[watch_uuid]
|
||||
return processing_worker_id != current_worker_id
|
||||
|
||||
|
||||
def queue_item_async_safe(update_q, item, silent=False):
|
||||
@@ -381,6 +431,53 @@ def get_worker_status():
|
||||
}
|
||||
|
||||
|
||||
def wait_for_all_checks(update_q, timeout=150):
|
||||
"""
|
||||
Wait for queue to be empty and all workers to be idle.
|
||||
|
||||
Args:
|
||||
update_q: The update queue to monitor
|
||||
timeout: Maximum wait time in seconds (default 150 = 150 iterations * 0.2-0.8s)
|
||||
|
||||
Returns:
|
||||
bool: True if all checks completed, False if timeout
|
||||
"""
|
||||
import time
|
||||
empty_since = None
|
||||
attempt = 0
|
||||
max_attempts = timeout
|
||||
|
||||
while attempt < max_attempts:
|
||||
# Adaptive sleep - start fast, slow down if needed
|
||||
if attempt < 10:
|
||||
sleep_time = 0.2 # Very fast initial checks
|
||||
elif attempt < 30:
|
||||
sleep_time = 0.4 # Medium speed
|
||||
else:
|
||||
sleep_time = 0.8 # Slower for persistent issues
|
||||
|
||||
time.sleep(sleep_time)
|
||||
|
||||
q_length = update_q.qsize()
|
||||
running_uuids = get_running_uuids()
|
||||
any_workers_busy = len(running_uuids) > 0
|
||||
|
||||
if q_length == 0 and not any_workers_busy:
|
||||
if empty_since is None:
|
||||
empty_since = time.time()
|
||||
# Brief stabilization period for async workers
|
||||
elif time.time() - empty_since >= 0.3:
|
||||
# Add small buffer for filesystem operations to complete
|
||||
time.sleep(0.2)
|
||||
return True
|
||||
else:
|
||||
empty_since = None
|
||||
|
||||
attempt += 1
|
||||
|
||||
return False # Timeout
|
||||
|
||||
|
||||
def check_worker_health(expected_count, update_q=None, notification_q=None, app=None, datastore=None):
|
||||
"""
|
||||
Check if the expected number of async workers are running and restart any missing ones.
|
||||
|
||||
@@ -42,7 +42,7 @@ orjson~=3.11
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise==1.9.6
|
||||
apprise==1.9.7
|
||||
|
||||
diff_match_patch
|
||||
|
||||
@@ -91,7 +91,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
|
||||
|
||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
|
||||
|
||||
pyppeteer-ng==2.0.0rc11
|
||||
pyppeteer-ng==2.0.0rc12
|
||||
pyppeteerstealth>=0.0.4
|
||||
|
||||
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup
|
||||
|
||||
28
setup.cfg
Normal file
28
setup.cfg
Normal file
@@ -0,0 +1,28 @@
|
||||
# Translation configuration for changedetection.io
|
||||
# See changedetectionio/translations/README.md for full documentation on updating translations
|
||||
|
||||
[extract_messages]
|
||||
# Extract translatable strings from source code
|
||||
mapping_file = babel.cfg
|
||||
output_file = changedetectionio/translations/messages.pot
|
||||
input_paths = changedetectionio
|
||||
keywords = _ _l gettext
|
||||
# Options to reduce unnecessary changes in .pot files
|
||||
sort_by_file = true
|
||||
width = 120
|
||||
add_location = file
|
||||
|
||||
[update_catalog]
|
||||
# Update existing .po files with new strings from .pot
|
||||
# Note: Omitting 'locale' makes Babel auto-discover all catalogs in output_dir
|
||||
input_file = changedetectionio/translations/messages.pot
|
||||
output_dir = changedetectionio/translations
|
||||
domain = messages
|
||||
# Options for consistent formatting
|
||||
width = 120
|
||||
no_fuzzy_matching = true
|
||||
|
||||
[compile_catalog]
|
||||
# Compile .po files to .mo binary format
|
||||
directory = changedetectionio/translations
|
||||
domain = messages
|
||||
Reference in New Issue
Block a user