Compare commits

..

7 Commits

Author SHA1 Message Date
dgtlmoon
426694b002 Add translations 2026-01-22 06:41:25 +01:00
dgtlmoon
1ab4ca63ae New language configs 2026-01-22 06:31:31 +01:00
dgtlmoon
3ee24a4b9c Small fix, improve test 2026-01-22 06:26:27 +01:00
dgtlmoon
220499fd0f Rebuild lang 2026-01-22 06:21:46 +01:00
dgtlmoon
50c798f498 Adding translations 2026-01-22 06:15:29 +01:00
dgtlmoon
fce47cde95 Merge branch 'master' into 3792-language-selection-session 2026-01-22 06:11:41 +01:00
dgtlmoon
2b9618bbb5 Make language selection sticky and provide a way to return back to default auto-detect #3792 2026-01-22 05:57:37 +01:00
15 changed files with 236 additions and 922 deletions

View File

@@ -111,32 +111,6 @@ jobs:
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
- name: Test CLI options
run: |
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
docker run --name test-cdio-cli-opts --network changedet-network test-changedetectionio bash -c 'changedetectionio/test_cli_opts.sh' &> cli-opts-output.txt
echo "=== CLI Options Test Output ==="
cat cli-opts-output.txt
- name: CLI Memory Test
run: |
echo "=== Checking CLI batch mode memory usage ==="
# Extract RSS memory value from output
RSS_MB=$(grep -oP "Memory consumption before worker shutdown: RSS=\K[\d.]+" cli-opts-output.txt | head -1 || echo "0")
echo "RSS Memory: ${RSS_MB} MB"
# Check if RSS is less than 100MB
if [ -n "$RSS_MB" ]; then
if (( $(echo "$RSS_MB < 100" | bc -l) )); then
echo "✓ Memory usage is acceptable: ${RSS_MB} MB < 100 MB"
else
echo "✗ Memory usage too high: ${RSS_MB} MB >= 100 MB"
exit 1
fi
else
echo "⚠ Could not extract memory usage, skipping check"
fi
- name: Extract memory report and logs
if: always()
uses: ./.github/actions/extract-memory-report
@@ -151,13 +125,6 @@ jobs:
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
path: output-logs
- name: Store CLI test output
if: always()
uses: actions/upload-artifact@v6
with:
name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }}
path: cli-opts-output.txt
# Playwright tests
playwright-tests:
runs-on: ubuntu-latest

View File

@@ -2,24 +2,23 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1.
__version__ = '0.52.9'
__version__ = '0.52.8'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
from loguru import logger
import getopt
import logging
import os
import getopt
import platform
import signal
import threading
import time
import sys
# Eventlet completely removed - using threading mode for SocketIO
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
# Note: store and changedetection_app are imported inside main() to avoid
# initialization before argument parsing (allows --help to work without loading everything)
from changedetectionio import store
from changedetectionio.flask_app import changedetection_app
from loguru import logger
# ==============================================================================
# Multiprocessing Configuration - CRITICAL for Thread Safety
@@ -84,22 +83,11 @@ def get_version():
def sigshutdown_handler(_signo, _stack_frame):
name = signal.Signals(_signo).name
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
# Set exit flag immediately to stop all loops
app.config.exit.set()
datastore.stop_thread = True
# Log memory consumption before shutting down workers (cross-platform)
try:
import psutil
process = psutil.Process()
mem_info = process.memory_info()
rss_mb = mem_info.rss / 1024 / 1024
vms_mb = mem_info.vms / 1024 / 1024
logger.info(f"Memory consumption before worker shutdown: RSS={rss_mb:,.2f} MB, VMS={vms_mb:,.2f} MB")
except Exception as e:
logger.warning(f"Could not retrieve memory stats: {str(e)}")
# Shutdown workers and queues immediately
try:
from changedetectionio import worker_handler
@@ -137,47 +125,10 @@ def main():
global datastore
global app
# Early help/version check before any initialization
if '--help' in sys.argv or '-help' in sys.argv:
print('Usage: changedetection.py [options]')
print('')
print('Standard options:')
print(' -s SSL enable')
print(' -h HOST Listen host (default: 0.0.0.0)')
print(' -p PORT Listen port (default: 5000)')
print(' -d PATH Datastore path')
print(' -l LEVEL Log level (TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL)')
print(' -c Cleanup unused snapshots')
print(' -C Create datastore directory if it doesn\'t exist')
print('')
print('Add URLs on startup:')
print(' -u URL Add URL to watch (can be used multiple times)')
print(' -u0 \'JSON\' Set options for first -u URL (e.g. \'{"processor":"text_json_diff"}\')')
print(' -u1 \'JSON\' Set options for second -u URL (0-indexed)')
print(' -u2 \'JSON\' Set options for third -u URL, etc.')
print(' Available options: processor, fetch_backend, headers, method, etc.')
print(' See model/Watch.py for all available options')
print('')
print('Recheck on startup:')
print(' -r all Queue all watches for recheck on startup')
print(' -r UUID,... Queue specific watches (comma-separated UUIDs)')
print('')
print('Batch mode:')
print(' -b Run in batch mode (process queue then exit)')
print(' Useful for CI/CD, cron jobs, or one-time checks')
print('')
sys.exit(0)
if '--version' in sys.argv or '-v' in sys.argv:
print(f'changedetection.io {__version__}')
sys.exit(0)
# Import heavy modules after help/version checks to keep startup fast for those flags
from changedetectionio import store
from changedetectionio.flask_app import changedetection_app
datastore_path = None
do_cleanup = False
# Optional URL to watch since start
default_url = None
# Set a default logger level
logger_level = 'DEBUG'
include_default_watches = True
@@ -186,12 +137,6 @@ def main():
port = int(os.environ.get('PORT', 5000))
ssl_mode = False
# Lists for multiple URLs and their options
urls_to_add = []
url_options = {} # Key: index (0-based), Value: dict of options
recheck_watches = None # None, 'all', or list of UUIDs
batch_mode = False # Run once then exit when queue is empty
# On Windows, create and use a default path.
if os.name == 'nt':
datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
@@ -200,85 +145,10 @@ def main():
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
datastore_path = os.path.join(os.getcwd(), "../datastore")
# Pre-process arguments to extract -u, -u<N>, and -r options before getopt
# This allows unlimited -u0, -u1, -u2, ... options without predefining them
cleaned_argv = ['changedetection.py'] # Start with program name
i = 1
while i < len(sys.argv):
arg = sys.argv[i]
# Handle -u (add URL)
if arg == '-u' and i + 1 < len(sys.argv):
urls_to_add.append(sys.argv[i + 1])
i += 2
continue
# Handle -u<N> (set options for URL at index N)
if arg.startswith('-u') and len(arg) > 2 and arg[2:].isdigit():
idx = int(arg[2:])
if i + 1 < len(sys.argv):
try:
import json
url_options[idx] = json.loads(sys.argv[i + 1])
except json.JSONDecodeError as e:
print(f'Error: Invalid JSON for {arg}: {sys.argv[i + 1]}')
print(f'JSON decode error: {e}')
sys.exit(2)
i += 2
continue
# Handle -r (recheck watches)
if arg == '-r' and i + 1 < len(sys.argv):
recheck_arg = sys.argv[i + 1]
if recheck_arg.lower() == 'all':
recheck_watches = 'all'
else:
# Parse comma-separated list of UUIDs
recheck_watches = [uuid.strip() for uuid in recheck_arg.split(',') if uuid.strip()]
i += 2
continue
# Handle -b (batch mode - run once and exit)
if arg == '-b':
batch_mode = True
i += 1
continue
# Keep other arguments for getopt
cleaned_argv.append(arg)
i += 1
try:
opts, args = getopt.getopt(cleaned_argv[1:], "6Ccsd:h:p:l:", "port")
except getopt.GetoptError as e:
print('Usage: changedetection.py [options]')
print('')
print('Standard options:')
print(' -s SSL enable')
print(' -h HOST Listen host (default: 0.0.0.0)')
print(' -p PORT Listen port (default: 5000)')
print(' -d PATH Datastore path')
print(' -l LEVEL Log level (TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL)')
print(' -c Cleanup unused snapshots')
print(' -C Create datastore directory if it doesn\'t exist')
print('')
print('Add URLs on startup:')
print(' -u URL Add URL to watch (can be used multiple times)')
print(' -u0 \'JSON\' Set options for first -u URL (e.g. \'{"processor":"text_json_diff"}\')')
print(' -u1 \'JSON\' Set options for second -u URL (0-indexed)')
print(' -u2 \'JSON\' Set options for third -u URL, etc.')
print(' Available options: processor, fetch_backend, headers, method, etc.')
print(' See model/Watch.py for all available options')
print('')
print('Recheck on startup:')
print(' -r all Queue all watches for recheck on startup')
print(' -r UUID,... Queue specific watches (comma-separated UUIDs)')
print('')
print('Batch mode:')
print(' -b Run in batch mode (process queue then exit)')
print(' Useful for CI/CD, cron jobs, or one-time checks')
print('')
print(f'Error: {e}')
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
sys.exit(2)
create_datastore_dir = False
@@ -303,6 +173,10 @@ def main():
if opt == '-d':
datastore_path = arg
if opt == '-u':
default_url = arg
include_default_watches = False
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
@@ -314,10 +188,6 @@ def main():
if opt == '-l':
logger_level = int(arg) if arg.isdigit() else arg.upper()
# If URLs are provided, don't include default watches
if urls_to_add:
include_default_watches = False
logger.success(f"changedetection.io version {get_version()} starting.")
# Launch using SocketIO run method for proper integration (if enabled)
@@ -354,14 +224,11 @@ def main():
logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING)
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
app_config = {
'datastore_path': datastore_path,
'batch_mode': batch_mode
}
app_config = {'datastore_path': datastore_path}
if not os.path.isdir(app_config['datastore_path']):
if create_datastore_dir:
os.makedirs(app_config['datastore_path'], exist_ok=True)
os.mkdir(app_config['datastore_path'])
else:
logger.critical(
f"ERROR: Directory path for the datastore '{app_config['datastore_path']}'"
@@ -382,132 +249,11 @@ def main():
from changedetectionio.pluggy_interface import inject_datastore_into_plugins
inject_datastore_into_plugins(datastore)
# Step 1: Add URLs with their options (if provided via -u flags)
added_watch_uuids = []
if urls_to_add:
logger.info(f"Adding {len(urls_to_add)} URL(s) from command line")
for idx, url in enumerate(urls_to_add):
extras = url_options.get(idx, {})
if extras:
logger.debug(f"Adding watch {idx}: {url} with options: {extras}")
else:
logger.debug(f"Adding watch {idx}: {url}")
new_uuid = datastore.add_watch(url=url, extras=extras)
if new_uuid:
added_watch_uuids.append(new_uuid)
logger.success(f"Added watch: {url} (UUID: {new_uuid})")
else:
logger.error(f"Failed to add watch: {url}")
if default_url:
datastore.add_watch(url = default_url)
app = changedetection_app(app_config, datastore)
# Step 2: Queue newly added watches (if -u was provided in batch mode)
# This must happen AFTER app initialization so update_q is available
if batch_mode and added_watch_uuids:
from changedetectionio.flask_app import update_q
from changedetectionio import queuedWatchMetaData, worker_handler
logger.info(f"Batch mode: Queuing {len(added_watch_uuids)} newly added watches")
for watch_uuid in added_watch_uuids:
try:
worker_handler.queue_item_async_safe(
update_q,
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
)
logger.debug(f"Queued newly added watch: {watch_uuid}")
except Exception as e:
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
# Step 3: Queue watches for recheck (if -r was provided)
# This must happen AFTER app initialization so update_q is available
if recheck_watches is not None:
from changedetectionio.flask_app import update_q
from changedetectionio import queuedWatchMetaData, worker_handler
watches_to_queue = []
if recheck_watches == 'all':
# Queue all watches, excluding those already queued in batch mode
all_watches = list(datastore.data['watching'].keys())
if batch_mode and added_watch_uuids:
# Exclude newly added watches that were already queued in batch mode
watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids]
logger.info(f"Queuing {len(watches_to_queue)} existing watches for recheck ({len(added_watch_uuids)} newly added watches already queued)")
else:
watches_to_queue = all_watches
logger.info(f"Queuing all {len(watches_to_queue)} watches for recheck")
else:
# Queue specific UUIDs
watches_to_queue = recheck_watches
logger.info(f"Queuing {len(watches_to_queue)} specific watches for recheck")
queued_count = 0
for watch_uuid in watches_to_queue:
if watch_uuid in datastore.data['watching']:
try:
worker_handler.queue_item_async_safe(
update_q,
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
)
queued_count += 1
logger.debug(f"Queued watch for recheck: {watch_uuid}")
except Exception as e:
logger.error(f"Failed to queue watch {watch_uuid}: {e}")
else:
logger.warning(f"Watch UUID not found in datastore: {watch_uuid}")
logger.success(f"Successfully queued {queued_count} watches for recheck")
# Step 4: Setup batch mode monitor (if -b was provided)
if batch_mode:
from changedetectionio.flask_app import update_q
def batch_mode_monitor():
"""Monitor queue and workers, shutdown when all work is complete"""
import time
logger.info("Batch mode: Waiting for all queued items to complete...")
# Wait a bit for workers to start processing
time.sleep(3)
idle_start = None
idle_threshold = 3 # Seconds to wait after queue is empty
while True:
try:
queue_size = update_q.qsize()
running_uuids = worker_handler.get_running_uuids()
running_count = len(running_uuids)
logger.info(f"Batch mode: Queue size: {queue_size}, Running workers: {running_count}, Running UUIDs: {running_uuids}")
if queue_size == 0 and running_count == 0:
if idle_start is None:
idle_start = time.time()
logger.info(f"Batch mode: Queue empty and workers idle, waiting {idle_threshold}s before shutdown...")
elif time.time() - idle_start >= idle_threshold:
logger.success("Batch mode: All work completed, initiating shutdown")
# Trigger shutdown
import os, signal
os.kill(os.getpid(), signal.SIGTERM)
return
else:
if idle_start is not None:
logger.info(f"Batch mode: Activity detected (queue: {queue_size}, running: {running_count})")
idle_start = None
time.sleep(2)
except Exception as e:
logger.error(f"Batch mode monitor error: {e}")
time.sleep(2)
# Start monitor in background thread
monitor_thread = threading.Thread(target=batch_mode_monitor, daemon=True, name="BatchModeMonitor")
monitor_thread.start()
logger.info("Batch mode enabled: Will exit after all queued items are processed")
# Get the SocketIO instance from the Flask app (created in flask_app.py)
from changedetectionio.flask_app import socketio_server
global socketio
@@ -568,33 +314,20 @@ def main():
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
# In batch mode, skip starting the HTTP server - just keep workers running
if batch_mode:
logger.info("Batch mode: Skipping HTTP server startup, workers will process queue")
logger.info("Batch mode: Main thread will wait for shutdown signal")
# Keep main thread alive until batch monitor triggers shutdown
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
logger.info("Batch mode: Keyboard interrupt received")
pass
else:
# Normal mode: Start HTTP server
# SocketIO instance is already initialized in flask_app.py
if socketio_server:
if ssl_mode:
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
socketio.run(app, host=host, port=int(port), debug=False,
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
else:
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
# SocketIO instance is already initialized in flask_app.py
if socketio_server:
if ssl_mode:
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
socketio.run(app, host=host, port=int(port), debug=False,
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
else:
# Run Flask app without Socket.IO if disabled
logger.info("Starting Flask app without Socket.IO server")
if ssl_mode:
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
app.run(host=host, port=int(port), debug=False,
ssl_context=(ssl_cert_file, ssl_privkey_file))
else:
app.run(host=host, port=int(port), debug=False)
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
else:
# Run Flask app without Socket.IO if disabled
logger.info("Starting Flask app without Socket.IO server")
if ssl_mode:
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
app.run(host=host, port=int(port), debug=False,
ssl_context=(ssl_cert_file, ssl_privkey_file))
else:
app.run(host=host, port=int(port), debug=False)

View File

@@ -163,10 +163,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
except ProcessorException as e:
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot)
e.screenshot = None # Free memory immediately
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
e.xpath_data = None # Free memory immediately
datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
process_changedetection_results = False
@@ -186,11 +184,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
e.screenshot = None # Free memory immediately
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
e.xpath_data = None # Free memory immediately
process_changedetection_results = False
@@ -209,10 +205,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
e.screenshot = None # Free memory immediately
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data, as_error=True)
e.xpath_data = None # Free memory immediately
if e.page_text:
watch.save_error_text(contents=e.page_text)
@@ -229,11 +223,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot)
e.screenshot = None # Free memory immediately
if e.xpath_data:
watch.save_xpath_data(data=e.xpath_data)
e.xpath_data = None # Free memory immediately
# Only when enabled, send the notification
if watch.get('filter_failure_notification_send', False):
@@ -325,7 +317,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
err_text = "Error running JS Actions - Page request - "+e.message
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
e.screenshot = None # Free memory immediately
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
@@ -337,7 +328,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
e.screenshot = None # Free memory immediately
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code,
@@ -379,17 +369,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
if changed_detected or not watch.history_n:
if update_handler.screenshot:
watch.save_screenshot(screenshot=update_handler.screenshot)
# Free screenshot memory immediately after saving
update_handler.screenshot = None
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'screenshot'):
update_handler.fetcher.screenshot = None
if update_handler.xpath_data:
watch.save_xpath_data(data=update_handler.xpath_data)
# Free xpath data memory
update_handler.xpath_data = None
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'xpath_data'):
update_handler.fetcher.xpath_data = None
# Ensure unique timestamp for history
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
@@ -456,20 +438,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
update_handler.fetcher.clear_content()
logger.debug(f"Cleared fetcher content for UUID {uuid}")
# Explicitly delete update_handler to free all references
if update_handler:
del update_handler
update_handler = None
# Force aggressive memory cleanup after clearing
import gc
gc.collect()
try:
import ctypes
ctypes.CDLL('libc.so.6').malloc_trim(0)
except Exception:
pass
except Exception as e:
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
logger.error(f"Worker {worker_id} traceback:", exc_info=True)

View File

@@ -71,19 +71,10 @@ class Fetcher():
supports_screenshots = False # Can capture page screenshots
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
# Screenshot element locking - prevents layout shifts during screenshot capture
# Only needed for visual comparison (image_ssim_diff processor)
# Locks element dimensions in the first viewport to prevent headers/ads from resizing
lock_viewport_elements = False # Default: disabled for performance
def __init__(self, **kwargs):
if kwargs and 'screenshot_format' in kwargs:
self.screenshot_format = kwargs.get('screenshot_format')
# Allow lock_viewport_elements to be set via kwargs
if kwargs and 'lock_viewport_elements' in kwargs:
self.lock_viewport_elements = kwargs.get('lock_viewport_elements')
@classmethod
def get_status_icon_data(cls):

View File

@@ -10,20 +10,18 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, vi
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
async def capture_full_page_async(page, screenshot_format='JPEG'):
import os
import time
import multiprocessing
start = time.time()
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
setup_start = time.time()
page_height = await page.evaluate("document.documentElement.scrollHeight")
page_width = await page.evaluate("document.documentElement.scrollWidth")
original_viewport = page.viewport_size
dimensions_time = time.time() - setup_start
logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
@@ -31,31 +29,25 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
y = 0
elements_locked = False
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
# This prevents headers/ads from resizing when viewport changes
if lock_viewport_elements and page_height > page.viewport_size['height']:
lock_start = time.time()
if page_height > page.viewport_size['height']:
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
with open(lock_elements_js_path, 'r') as f:
lock_elements_js = f.read()
await page.evaluate(lock_elements_js)
elements_locked = True
lock_time = time.time() - lock_start
logger.debug(f"{watch_info}Viewport element locking enabled (took {lock_time:.2f}s)")
if page_height > page.viewport_size['height']:
logger.debug("Element dimensions locked before screenshot capture")
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
viewport_start = time.time()
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
# Set viewport to a larger size to capture more content at once
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
viewport_time = time.time() - viewport_start
logger.debug(f"{watch_info}Viewport changed to {page.viewport_size['width']}x{step_size} (took {viewport_time:.2f}s)")
# Capture screenshots in chunks up to the max total height
capture_start = time.time()
chunk_times = []
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
# PNG should use quality 100, JPEG uses configurable quality
@@ -77,11 +69,7 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
if screenshot_type == 'jpeg':
screenshot_kwargs['quality'] = screenshot_quality
chunk_start = time.time()
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
chunk_time = time.time() - chunk_start
chunk_times.append(chunk_time)
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
y += step_size
# Restore original viewport size
@@ -93,54 +81,40 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
with open(unlock_elements_js_path, 'r') as f:
unlock_elements_js = f.read()
await page.evaluate(unlock_elements_js)
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
capture_time = time.time() - capture_start
total_capture_time = sum(chunk_times)
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
logger.debug("Element dimensions unlocked after screenshot capture")
# If we have multiple chunks, stitch them together
if len(screenshot_chunks) > 1:
stitch_start = time.time()
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
# Always use spawn subprocess for ANY stitching (2+ chunks)
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
import multiprocessing
import struct
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
# Only use separate process for many chunks (4+) to avoid blocking the event loop
if len(screenshot_chunks) <= 3:
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
else:
# Use separate process for many chunks to avoid blocking
# Always use spawn for thread safety - consistent behavior in tests and production
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
ctx = multiprocessing.get_context('spawn')
parent_conn, child_conn = ctx.Pipe()
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
screenshot = parent_conn.recv_bytes()
p.join()
# Explicit cleanup
del p
del parent_conn, child_conn
ctx = multiprocessing.get_context('spawn')
parent_conn, child_conn = ctx.Pipe()
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
# Send via raw bytes (no pickle)
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
for chunk in screenshot_chunks:
parent_conn.send_bytes(chunk)
screenshot = parent_conn.recv_bytes()
p.join()
parent_conn.close()
child_conn.close()
del p, parent_conn, child_conn
stitch_time = time.time() - stitch_start
total_time = time.time() - start
setup_time = total_time - capture_time - stitch_time
logger.debug(
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
# Explicit cleanup
del screenshot_chunks
screenshot_chunks = None
return screenshot
total_time = time.time() - start
setup_time = total_time - capture_time
logger.debug(
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
return screenshot_chunks[0]
@@ -210,8 +184,7 @@ class fetcher(Fetcher):
async def screenshot_step(self, step_n=''):
super().screenshot_step(step_n=step_n)
watch_uuid = getattr(self, 'watch_uuid', None)
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
# Request GC immediately after screenshot to free memory
# Screenshots can be large and browser steps take many of them
@@ -260,7 +233,6 @@ class fetcher(Fetcher):
import playwright._impl._errors
import time
self.delete_browser_steps_screenshots()
self.watch_uuid = watch_uuid # Store for use in screenshot_step
response = None
async with async_playwright() as p:
@@ -346,7 +318,7 @@ class fetcher(Fetcher):
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
# Cleanup before raising to prevent memory leak
await self.page.close()
await context.close()
@@ -402,17 +374,7 @@ class fetcher(Fetcher):
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
# Force aggressive memory cleanup - screenshots are large and base64 decode creates temporary buffers
await self.page.request_gc()
gc.collect()
# Release C-level memory from base64 decode back to OS
try:
import ctypes
ctypes.CDLL('libc.so.6').malloc_trim(0)
except Exception:
pass
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
except ScreenshotUnavailable:
# Re-raise screenshot unavailable exceptions

View File

@@ -20,20 +20,18 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
async def capture_full_page(page, screenshot_format='JPEG'):
import os
import time
import multiprocessing
start = time.time()
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
setup_start = time.time()
page_height = await page.evaluate("document.documentElement.scrollHeight")
page_width = await page.evaluate("document.documentElement.scrollWidth")
original_viewport = page.viewport
dimensions_time = time.time() - setup_start
logger.debug(f"{watch_info}Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
@@ -52,35 +50,20 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
screenshot_chunks = []
y = 0
elements_locked = False
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
# This prevents headers/ads from resizing when viewport changes
if lock_viewport_elements and page_height > page.viewport['height']:
lock_start = time.time()
if page_height > page.viewport['height']:
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
# capture_full_page() changes viewport height which triggers @media (min-height) rules
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
file_read_start = time.time()
with open(lock_elements_js_path, 'r') as f:
lock_elements_js = f.read()
file_read_time = time.time() - file_read_start
evaluate_start = time.time()
await page.evaluate(lock_elements_js)
evaluate_time = time.time() - evaluate_start
elements_locked = True
lock_time = time.time() - lock_start
logger.debug(f"{watch_info}Viewport element locking enabled - File read: {file_read_time:.3f}s, Browser evaluate: {evaluate_time:.2f}s, Total: {lock_time:.2f}s")
logger.debug("Element dimensions locked before screenshot capture")
if page_height > page.viewport['height']:
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
viewport_start = time.time()
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
viewport_time = time.time() - viewport_start
logger.debug(f"{watch_info}Viewport changed to {page.viewport['width']}x{step_size} (took {viewport_time:.2f}s)")
capture_start = time.time()
chunk_times = []
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
# better than scrollTo incase they override it in the page
await page.evaluate(
@@ -99,11 +82,7 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
if screenshot_type == 'jpeg':
screenshot_kwargs['quality'] = screenshot_quality
chunk_start = time.time()
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
chunk_time = time.time() - chunk_start
chunk_times.append(chunk_time)
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
y += step_size
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
@@ -114,53 +93,26 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
with open(unlock_elements_js_path, 'r') as f:
unlock_elements_js = f.read()
await page.evaluate(unlock_elements_js)
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
capture_time = time.time() - capture_start
total_capture_time = sum(chunk_times)
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
logger.debug("Element dimensions unlocked after screenshot capture")
if len(screenshot_chunks) > 1:
stitch_start = time.time()
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
# Always use spawn subprocess for ANY stitching (2+ chunks)
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
import multiprocessing
import struct
# Always use spawn for thread safety - consistent behavior in tests and production
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
ctx = multiprocessing.get_context('spawn')
parent_conn, child_conn = ctx.Pipe()
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
# Send via raw bytes (no pickle)
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
for chunk in screenshot_chunks:
parent_conn.send_bytes(chunk)
screenshot = parent_conn.recv_bytes()
p.join()
parent_conn.close()
child_conn.close()
del p, parent_conn, child_conn
stitch_time = time.time() - stitch_start
total_time = time.time() - start
setup_time = total_time - capture_time - stitch_time
logger.debug(
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
screenshot_chunks = None
return screenshot
total_time = time.time() - start
setup_time = total_time - capture_time
logger.debug(
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
return screenshot_chunks[0]
@@ -405,7 +357,7 @@ class fetcher(Fetcher):
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
@@ -435,17 +387,7 @@ class fetcher(Fetcher):
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
logger.debug(f"Screenshot format {self.screenshot_format}")
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
# Force aggressive memory cleanup - pyppeteer base64 decode creates temporary buffers
import gc
gc.collect()
# Release C-level memory from base64 decode back to OS
try:
import ctypes
ctypes.CDLL('libc.so.6').malloc_trim(0)
except Exception:
pass
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
"visualselector_xpath_selectors": visualselector_xpath_selectors,
"max_height": MAX_TOTAL_HEIGHT

View File

@@ -1,5 +1,5 @@
/**
* Lock Element Dimensions for Screenshot Capture (First Viewport Only)
* Lock Element Dimensions for Screenshot Capture
*
* THE PROBLEM:
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
@@ -10,31 +10,40 @@
* However, changing the viewport height triggers CSS media queries like:
* @media (min-height: 860px) { .ad { height: 250px; } }
*
* This causes elements (especially ads/headers) to resize during screenshot capture.
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
* - Screenshot shows element at NEW size (after media query triggered)
* - xpath element coordinates measured at OLD size (before viewport change)
* - Visual selector overlays don't align with screenshot
*
* EXAMPLE BUG:
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
* - Viewport changes to 1280x3809 for screenshot
* - Media query triggers: ad expands to 250px
* - All content below shifts down by 112px (250-138)
* - Article now at position: 391px (279+112)
* - But xpath data says 279px → 112px mismatch! ✗
*
* THE SOLUTION:
* Lock element dimensions in the FIRST VIEWPORT ONLY with !important inline styles.
* This prevents headers, navigation, and top ads from resizing when viewport changes.
* We only lock the visible portion because:
* - Most layout shifts happen in headers/navbars/top ads
* - Locking only visible elements is 100x+ faster (100-200 elements vs 10,000+)
* - Below-fold content shifts don't affect visual comparison accuracy
* Before changing viewport, lock ALL element dimensions with !important inline styles.
* Inline styles with !important override media query CSS, preventing layout changes.
*
* WHAT THIS SCRIPT DOES:
* 1. Gets current viewport height
* 2. Finds elements within first viewport (top of page to bottom of screen)
* 3. Locks their dimensions with !important inline styles
* 1. Iterates through every element on the page
* 2. Captures current computed dimensions (width, height)
* 3. Sets inline styles with !important to freeze those dimensions
* 4. Disables ResizeObserver API (for JS-based resizing)
* 5. When viewport changes for screenshot, media queries can't resize anything
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
*
* USAGE:
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
* Only enabled for image_ssim_diff processor (visual comparison).
* Default: OFF for performance.
* The page must be fully loaded and settled at its initial viewport size.
* No need to restore state afterward - page is closed after screenshot.
*
* PERFORMANCE:
* - Only processes 100-300 elements (first viewport) vs 10,000+ (entire page)
* - Typically completes in 10-50ms
* - 100x+ faster than locking entire page
* - Iterates all DOM elements (can be 1000s on complex pages)
* - Typically completes in 50-200ms
* - One-time cost before screenshot, well worth it for coordinate accuracy
*
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
*/
@@ -43,34 +52,11 @@
// Store original styles in a global WeakMap for later restoration
window.__elementSizingRestore = new WeakMap();
const start = performance.now();
// Get current viewport height (visible portion of page)
const viewportHeight = window.innerHeight;
// Get all elements and filter to FIRST VIEWPORT ONLY
// This dramatically reduces elements to process (100-300 vs 10,000+)
const allElements = Array.from(document.querySelectorAll('*'));
// BATCH READ PHASE: Get bounding rects and filter to viewport
const measurements = allElements.map(el => {
const rect = el.getBoundingClientRect();
// Lock ALL element dimensions to prevent media query layout changes
document.querySelectorAll('*').forEach(el => {
const computed = window.getComputedStyle(el);
const rect = el.getBoundingClientRect();
// Only lock elements in the first viewport (visible on initial page load)
// rect.top < viewportHeight means element starts within visible area
const inViewport = rect.top < viewportHeight && rect.top >= 0;
const hasSize = rect.height > 0 && rect.width > 0;
return inViewport && hasSize ? { el, computed, rect } : null;
}).filter(Boolean); // Remove null entries
const elapsed = performance.now() - start;
console.log(`Locked first viewport elements: ${measurements.length} of ${allElements.length} total elements (viewport height: ${viewportHeight}px, took ${elapsed.toFixed(0)}ms)`);
// BATCH WRITE PHASE: Apply all inline styles without triggering layout
// No interleaved reads means browser can optimize style application
measurements.forEach(({el, computed, rect}) => {
// Save original inline style values BEFORE locking
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
const originalStyles = {};
@@ -103,5 +89,5 @@
disconnect() {}
};
console.log(`✓ Element dimensions locked (${measurements.length} elements) to prevent media query changes during screenshot`);
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
})();

View File

@@ -8,42 +8,92 @@ from loguru import logger
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_height):
# Cache font to avoid loading on every stitch
_cached_font = None
def _get_caption_font():
"""Get or create cached font for caption text."""
global _cached_font
if _cached_font is None:
from PIL import ImageFont
try:
_cached_font = ImageFont.truetype("arial.ttf", 35)
except IOError:
_cached_font = ImageFont.load_default()
return _cached_font
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
"""
Stitch image chunks together in a separate process.
Uses spawn multiprocessing to isolate PIL's C-level memory allocation.
When the subprocess exits, the OS reclaims ALL memory including C-level allocations
that Python's GC cannot release. This prevents the ~50MB per stitch from accumulating
in the main process.
Trade-off: Adds 35MB resource_tracker subprocess, but prevents 500MB+ memory leak
in main process (much better at scale: 35GB vs 500GB for 1000 instances).
Stitch image chunks together inline (no multiprocessing).
Optimized for small number of chunks (2-3) to avoid process creation overhead.
Args:
pipe_conn: Pipe connection to receive data and send result
chunks_bytes: List of JPEG image bytes
original_page_height: Original page height in pixels
capture_height: Maximum capture height
Returns:
bytes: Stitched JPEG image
"""
import os
import io
from PIL import Image, ImageDraw
# Load images from byte chunks
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
total_height = sum(im.height for im in images)
max_width = max(im.width for im in images)
# Create stitched image
stitched = Image.new('RGB', (max_width, total_height))
y_offset = 0
for im in images:
stitched.paste(im, (0, y_offset))
y_offset += im.height
im.close() # Close immediately after pasting
# Draw caption only if page was trimmed
if original_page_height > capture_height:
draw = ImageDraw.Draw(stitched)
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
padding = 10
font = _get_caption_font()
bbox = draw.textbbox((0, 0), caption_text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# Draw white background rectangle
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
# Draw text centered
text_x = (max_width - text_width) // 2
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
# Encode to JPEG
output = io.BytesIO()
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
result = output.getvalue()
# Cleanup
stitched.close()
return result
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
"""
Stitch image chunks together in a separate process.
Used for large number of chunks (4+) to avoid blocking the main event loop.
"""
import os
import io
import struct
from PIL import Image, ImageDraw, ImageFont
try:
# Receive chunk count as 4-byte integer (no pickle!)
count_bytes = pipe_conn.recv_bytes()
chunk_count = struct.unpack('I', count_bytes)[0]
# Receive each chunk as raw bytes (no pickle!)
chunks_bytes = []
for _ in range(chunk_count):
chunks_bytes.append(pipe_conn.recv_bytes())
# Load images from byte chunks
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
del chunks_bytes
total_height = sum(im.height for im in images)
max_width = max(im.width for im in images)
@@ -53,14 +103,15 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
for im in images:
stitched.paste(im, (0, y_offset))
y_offset += im.height
im.close()
del images
im.close() # Close immediately after pasting
# Draw caption only if page was trimmed
if original_page_height > capture_height:
draw = ImageDraw.Draw(stitched)
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
padding = 10
# Try to load font
try:
font = ImageFont.truetype("arial.ttf", 35)
except IOError:
@@ -69,26 +120,23 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
bbox = draw.textbbox((0, 0), caption_text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# Draw white background rectangle
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
# Draw text centered
text_x = (max_width - text_width) // 2
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
# Encode and send
# Encode and send image with optimization
output = io.BytesIO()
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
result_bytes = output.getvalue()
pipe_conn.send_bytes(output.getvalue())
stitched.close()
del stitched
output.close()
del output
pipe_conn.send_bytes(result_bytes)
del result_bytes
except Exception as e:
logger.error(f"Error in stitch_images_worker_raw_bytes: {e}")
error_msg = f"error:{e}".encode('utf-8')
pipe_conn.send_bytes(error_msg)
pipe_conn.send(f"error:{e}")
finally:
pipe_conn.close()

View File

@@ -401,10 +401,7 @@ def changedetection_app(config=None, datastore_o=None):
# so far just for read-only via tests, but this will be moved eventually to be the main source
# (instead of the global var)
app.config['DATASTORE'] = datastore_o
# Store batch mode flag to skip background threads when running in batch mode
app.config['batch_mode'] = config.get('batch_mode', False) if config else False
# Store the signal in the app config to ensure it's accessible everywhere
app.config['watch_check_update_SIGNAL'] = watch_check_update
@@ -905,19 +902,14 @@ def changedetection_app(config=None, datastore_o=None):
logger.info(f"Starting {n_workers} workers during app initialization")
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
# Skip background threads in batch mode (just process queue and exit)
batch_mode = app.config.get('batch_mode', False)
if not batch_mode:
# @todo handle ctrl break
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
threading.Thread(target=notification_runner, daemon=True, name="NotificationRunner").start()
# @todo handle ctrl break
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
threading.Thread(target=notification_runner, daemon=True, name="NotificationRunner").start()
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
# Check for new release version, but not when running in test/build or pytest
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
else:
logger.info("Batch mode: Skipping ticker thread, notification runner, and version checker")
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
# Check for new release version, but not when running in test/build or pytest
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
threading.Thread(target=check_for_new_version, daemon=True, name="VersionChecker").start()
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
# This avoids circular dependencies

View File

@@ -20,9 +20,8 @@ mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86
def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
"""
Save compressed data using native brotli with streaming compression.
Uses chunked compression to minimize peak memory usage and malloc_trim()
to force release of C-level memory back to the OS.
Save compressed data using native brotli.
Testing shows no memory leak when using gc.collect() after compression.
Args:
contents: data to compress (str or bytes)
@@ -38,52 +37,27 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
"""
import brotli
import gc
import ctypes
# Ensure contents are bytes
if isinstance(contents, str):
contents = contents.encode('utf-8')
try:
original_size = len(contents)
logger.debug(f"Starting brotli streaming compression of {original_size} bytes.")
logger.debug(f"Starting brotli compression of {len(contents)} bytes.")
# Create streaming compressor
compressor = brotli.Compressor(quality=6, mode=mode if mode is not None else brotli.MODE_GENERIC)
# Stream compress in chunks to minimize memory usage
chunk_size = 65536 # 64KB chunks
total_compressed_size = 0
if mode is not None:
compressed_data = brotli.compress(contents, mode=mode)
else:
compressed_data = brotli.compress(contents)
with open(filepath, 'wb') as f:
# Process data in chunks
offset = 0
while offset < len(contents):
chunk = contents[offset:offset + chunk_size]
compressed_chunk = compressor.process(chunk)
if compressed_chunk:
f.write(compressed_chunk)
total_compressed_size += len(compressed_chunk)
offset += chunk_size
f.write(compressed_data)
# Finalize compression - critical for proper cleanup
final_chunk = compressor.finish()
if final_chunk:
f.write(final_chunk)
total_compressed_size += len(final_chunk)
logger.debug(f"Finished brotli compression - From {len(contents)} to {len(compressed_data)} bytes.")
logger.debug(f"Finished brotli compression - From {original_size} to {total_compressed_size} bytes.")
# Cleanup: Delete compressor, force Python GC, then force C-level memory release
del compressor
# Force garbage collection to prevent memory buildup
gc.collect()
# Force release of C-level memory back to OS (since brotli is a C library)
try:
ctypes.CDLL('libc.so.6').malloc_trim(0)
except Exception:
pass # malloc_trim not available on all systems (e.g., macOS)
return filepath
except Exception as e:

View File

@@ -1,25 +1,19 @@
{
"name": "ChangeDetection.io",
"short_name": "ChangeDetect",
"description": "Self-hosted website change detection and monitoring",
"name": "",
"short_name": "",
"icons": [
{
"src": "android-chrome-192x192.png",
"sizes": "192x192",
"type": "image/png",
"purpose": "any maskable"
"type": "image/png"
},
{
"src": "android-chrome-256x256.png",
"sizes": "256x256",
"type": "image/png",
"purpose": "any maskable"
"type": "image/png"
}
],
"start_url": "/",
"theme_color": "#5bbad5",
"theme_color": "#ffffff",
"background_color": "#ffffff",
"display": "standalone",
"categories": ["utilities", "productivity"],
"orientation": "any"
"display": "standalone"
}

View File

@@ -27,7 +27,7 @@
<link rel="apple-touch-icon" sizes="180x180" href="{{url_for('static_content', group='favicons', filename='apple-touch-icon.png')}}">
<link rel="icon" type="image/png" sizes="32x32" href="{{url_for('static_content', group='favicons', filename='favicon-32x32.png')}}">
<link rel="icon" type="image/png" sizes="16x16" href="{{url_for('static_content', group='favicons', filename='favicon-16x16.png')}}">
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}" crossorigin="use-credentials">
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}">
<link rel="mask-icon" href="{{url_for('static_content', group='favicons', filename='safari-pinned-tab.svg')}}" color="#5bbad5">
<link rel="shortcut icon" href="{{url_for('static_content', group='favicons', filename='favicon.ico')}}">
<meta name="msapplication-TileColor" content="#da532c">

View File

@@ -1,243 +0,0 @@
#!/bin/bash
# Test script for CLI options - Parallel execution
# Tests -u, -uN, -r, -b flags
set -u # Exit on undefined variables
# Color output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Test results directory (for parallel safety)
TEST_RESULTS_DIR="/tmp/cli-test-results-$$"
mkdir -p "$TEST_RESULTS_DIR"
# Cleanup function
cleanup() {
echo ""
echo "=== Cleaning up test directories ==="
rm -rf /tmp/cli-test-* 2>/dev/null || true
rm -rf "$TEST_RESULTS_DIR" 2>/dev/null || true
# Kill any hanging processes
pkill -f "changedetection.py.*cli-test" 2>/dev/null || true
}
trap cleanup EXIT
# Helper to record test result
record_result() {
local test_num=$1
local status=$2 # pass or fail
local message=$3
echo "$status|$message" > "$TEST_RESULTS_DIR/test_${test_num}.result"
}
# Run a test in background
run_test() {
local test_num=$1
local test_name=$2
local test_func=$3
(
echo -e "${YELLOW}[Test $test_num]${NC} $test_name"
if $test_func "$test_num"; then
record_result "$test_num" "pass" "$test_name"
echo -e "${GREEN}✓ PASS${NC}: $test_name"
else
record_result "$test_num" "fail" "$test_name"
echo -e "${RED}✗ FAIL${NC}: $test_name"
fi
) &
}
# =============================================================================
# Test Functions (each runs independently)
# =============================================================================
test_help_flag() {
local test_id=$1
timeout 3 python3 changedetection.py --help 2>&1 | grep -q "Add URLs on startup"
}
test_version_flag() {
local test_id=$1
timeout 3 python3 changedetection.py --version 2>&1 | grep -qE "changedetection.io [0-9]+\.[0-9]+"
}
test_single_url() {
local test_id=$1
local dir="/tmp/cli-test-single-${test_id}-$$"
timeout 10 python3 changedetection.py -d "$dir" -C -u https://example.com -b &>/dev/null
[ -f "$dir/url-watches.json" ] && \
[ "$(python3 -c "import json; print(len(json.load(open('$dir/url-watches.json')).get('watching', {})))")" -eq 1 ]
}
test_multiple_urls() {
local test_id=$1
local dir="/tmp/cli-test-multi-${test_id}-$$"
timeout 12 python3 changedetection.py -d "$dir" -C \
-u https://example.com \
-u https://github.com \
-u https://httpbin.org \
-b &>/dev/null
[ -f "$dir/url-watches.json" ] && \
[ "$(python3 -c "import json; print(len(json.load(open('$dir/url-watches.json')).get('watching', {})))")" -eq 3 ]
}
test_url_with_options() {
local test_id=$1
local dir="/tmp/cli-test-opts-${test_id}-$$"
timeout 10 python3 changedetection.py -d "$dir" -C \
-u https://example.com \
-u0 '{"title":"Test Site","processor":"text_json_diff"}' \
-b &>/dev/null
[ -f "$dir/url-watches.json" ] && \
python3 -c "import json; data=json.load(open('$dir/url-watches.json')); watches=data.get('watching', {}); exit(0 if any(w.get('title')=='Test Site' for w in watches.values()) else 1)"
}
test_multiple_urls_with_options() {
local test_id=$1
local dir="/tmp/cli-test-multi-opts-${test_id}-$$"
timeout 12 python3 changedetection.py -d "$dir" -C \
-u https://example.com \
-u0 '{"title":"Site One"}' \
-u https://github.com \
-u1 '{"title":"Site Two"}' \
-b &>/dev/null
[ -f "$dir/url-watches.json" ] && \
[ "$(python3 -c "import json; print(len(json.load(open('$dir/url-watches.json')).get('watching', {})))")" -eq 2 ] && \
python3 -c "import json; data=json.load(open('$dir/url-watches.json')); watches=data.get('watching', {}); titles=[w.get('title') for w in watches.values()]; exit(0 if 'Site One' in titles and 'Site Two' in titles else 1)"
}
test_batch_mode_exit() {
local test_id=$1
local dir="/tmp/cli-test-batch-${test_id}-$$"
local start=$(date +%s)
timeout 15 python3 changedetection.py -d "$dir" -C \
-u https://example.com \
-b &>/dev/null
local end=$(date +%s)
local elapsed=$((end - start))
[ $elapsed -lt 14 ]
}
test_recheck_all() {
local test_id=$1
local dir="/tmp/cli-test-recheck-all-${test_id}-$$"
mkdir -p "$dir"
cat > "$dir/url-watches.json" << 'EOF'
{"watching":{"test-uuid":{"url":"https://example.com","last_checked":0,"processor":"text_json_diff","uuid":"test-uuid"}},"settings":{"application":{"password":false}}}
EOF
timeout 10 python3 changedetection.py -d "$dir" -r all -b 2>&1 | grep -q "Queuing all"
}
test_recheck_specific() {
local test_id=$1
local dir="/tmp/cli-test-recheck-uuid-${test_id}-$$"
mkdir -p "$dir"
cat > "$dir/url-watches.json" << 'EOF'
{"watching":{"uuid-1":{"url":"https://example.com","last_checked":0,"processor":"text_json_diff","uuid":"uuid-1"},"uuid-2":{"url":"https://github.com","last_checked":0,"processor":"text_json_diff","uuid":"uuid-2"}},"settings":{"application":{"password":false}}}
EOF
timeout 10 python3 changedetection.py -d "$dir" -r uuid-1,uuid-2 -b 2>&1 | grep -q "Queuing 2 specific watches"
}
test_combined_operations() {
local test_id=$1
local dir="/tmp/cli-test-combined-${test_id}-$$"
timeout 12 python3 changedetection.py -d "$dir" -C \
-u https://example.com \
-u https://github.com \
-r all \
-b &>/dev/null
[ -f "$dir/url-watches.json" ] && \
[ "$(python3 -c "import json; print(len(json.load(open('$dir/url-watches.json')).get('watching', {})))")" -eq 2 ]
}
test_invalid_json() {
local test_id=$1
local dir="/tmp/cli-test-invalid-${test_id}-$$"
timeout 5 python3 changedetection.py -d "$dir" -C \
-u https://example.com \
-u0 'invalid json here' \
2>&1 | grep -qi "invalid json\|json decode error"
}
test_create_directory() {
local test_id=$1
local dir="/tmp/cli-test-create-${test_id}-$$/nested/path"
timeout 10 python3 changedetection.py -d "$dir" -C \
-u https://example.com \
-b &>/dev/null
[ -d "$dir" ]
}
# =============================================================================
# Main Test Execution
# =============================================================================
echo "=========================================="
echo " CLI Options Test Suite (Parallel)"
echo "=========================================="
echo ""
# Launch all tests in parallel
run_test 1 "Help flag (--help) shows usage without initialization" test_help_flag
run_test 2 "Version flag (--version) displays version" test_version_flag
run_test 3 "Add single URL with -u flag" test_single_url
run_test 4 "Add multiple URLs with multiple -u flags" test_multiple_urls
run_test 5 "Add URL with JSON options using -u0" test_url_with_options
run_test 6 "Add multiple URLs with different options (-u0, -u1)" test_multiple_urls_with_options
run_test 7 "Batch mode (-b) exits automatically after processing" test_batch_mode_exit
run_test 8 "Recheck all watches with -r all" test_recheck_all
run_test 9 "Recheck specific watches with -r UUID" test_recheck_specific
run_test 10 "Combined: Add URLs and recheck all with -u and -r all" test_combined_operations
run_test 11 "Invalid JSON in -u0 option should show error" test_invalid_json
run_test 12 "Create datastore directory with -C flag" test_create_directory
# Wait for all tests to complete
echo ""
echo "Waiting for all tests to complete..."
wait
# Collect results
echo ""
echo "=========================================="
echo " Test Summary"
echo "=========================================="
TESTS_RUN=0
TESTS_PASSED=0
TESTS_FAILED=0
for result_file in "$TEST_RESULTS_DIR"/test_*.result; do
if [ -f "$result_file" ]; then
TESTS_RUN=$((TESTS_RUN + 1))
status=$(cut -d'|' -f1 < "$result_file")
if [ "$status" = "pass" ]; then
TESTS_PASSED=$((TESTS_PASSED + 1))
else
TESTS_FAILED=$((TESTS_FAILED + 1))
fi
fi
done
echo "Tests run: $TESTS_RUN"
echo -e "${GREEN}Tests passed: $TESTS_PASSED${NC}"
if [ $TESTS_FAILED -gt 0 ]; then
echo -e "${RED}Tests failed: $TESTS_FAILED${NC}"
else
echo -e "${GREEN}Tests failed: $TESTS_FAILED${NC}"
fi
echo "=========================================="
echo ""
# Exit with appropriate code
if [ $TESTS_FAILED -gt 0 ]; then
echo -e "${RED}Some tests failed!${NC}"
exit 1
else
echo -e "${GREEN}All tests passed!${NC}"
exit 0
fi

View File

@@ -2857,7 +2857,7 @@ msgstr "Echtzeit-Updates offline"
#: changedetectionio/templates/base.html
msgid "Select Language"
msgstr "Wählen Sie eine Sprache aus"
msgstr "Wählen Sie Sprache aus"
#: changedetectionio/templates/base.html
msgid "Auto-detect from browser"

View File

@@ -42,7 +42,7 @@ orjson~=3.11
# jq not available on Windows so must be installed manually
# Notification library
apprise==1.9.7
apprise==1.9.6
diff_match_patch
@@ -91,7 +91,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
# playwright is installed at Dockerfile build time because it's not available on all platforms
pyppeteer-ng==2.0.0rc12
pyppeteer-ng==2.0.0rc11
pyppeteerstealth>=0.0.4
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup