mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-01-22 23:20:23 +00:00
Compare commits
4 Commits
0.52.9
...
API-valida
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4fae34cd28 | ||
|
|
280f423cb3 | ||
|
|
a9c19d062b | ||
|
|
bac4022047 |
21
.github/workflows/containers.yml
vendored
21
.github/workflows/containers.yml
vendored
@@ -15,6 +15,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- dev
|
||||
|
||||
jobs:
|
||||
metadata:
|
||||
@@ -92,28 +93,17 @@ jobs:
|
||||
version: latest
|
||||
driver-opts: image=moby/buildkit:master
|
||||
|
||||
# master branch -> :dev container tag
|
||||
- name: Docker meta :dev
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
uses: docker/metadata-action@v5
|
||||
id: meta_dev
|
||||
with:
|
||||
images: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
||||
ghcr.io/${{ github.repository }}
|
||||
tags: |
|
||||
type=raw,value=dev
|
||||
|
||||
# dev branch -> :dev container tag
|
||||
- name: Build and push :dev
|
||||
id: docker_build
|
||||
if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }}
|
||||
if: ${{ github.ref == 'refs/heads/dev' }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta_dev.outputs.tags }}
|
||||
labels: ${{ steps.meta_dev.outputs.labels }}
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
@@ -152,7 +142,6 @@ jobs:
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
@@ -78,12 +78,6 @@ RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
|
||||
# Final image stage
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm
|
||||
LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"
|
||||
LABEL org.opencontainers.image.url="https://changedetection.io"
|
||||
LABEL org.opencontainers.image.documentation="https://changedetection.io/tutorials"
|
||||
LABEL org.opencontainers.image.title="changedetection.io"
|
||||
LABEL org.opencontainers.image.description="Self-hosted web page change monitoring and notification service"
|
||||
LABEL org.opencontainers.image.licenses="Apache-2.0"
|
||||
LABEL org.opencontainers.image.vendor="changedetection.io"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libxslt1.1 \
|
||||
|
||||
@@ -16,7 +16,6 @@ recursive-include changedetectionio/widgets *
|
||||
prune changedetectionio/static/package-lock.json
|
||||
prune changedetectionio/static/styles/node_modules
|
||||
prune changedetectionio/static/styles/package-lock.json
|
||||
include changedetectionio/favicon_utils.py
|
||||
include changedetection.py
|
||||
include requirements.txt
|
||||
include README-pip.md
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.52.9'
|
||||
__version__ = '0.52.7'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -2,7 +2,6 @@ import os
|
||||
import threading
|
||||
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
from . import auth
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
@@ -403,9 +402,16 @@ class WatchFavicon(Resource):
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
|
||||
@@ -30,23 +30,13 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
app: Flask application instance
|
||||
datastore: Application datastore
|
||||
executor: ThreadPoolExecutor for queue operations (optional)
|
||||
|
||||
Returns:
|
||||
"restart" if worker should restart, "shutdown" for clean exit
|
||||
"""
|
||||
# Set a descriptive name for this task
|
||||
task = asyncio.current_task()
|
||||
if task:
|
||||
task.set_name(f"async-worker-{worker_id}")
|
||||
|
||||
# Read restart policy from environment
|
||||
max_jobs = int(os.getenv("WORKER_MAX_JOBS", "10"))
|
||||
max_runtime_seconds = int(os.getenv("WORKER_MAX_RUNTIME", "3600")) # 1 hour default
|
||||
|
||||
jobs_processed = 0
|
||||
start_time = time.time()
|
||||
|
||||
logger.info(f"Starting async worker {worker_id} (max_jobs={max_jobs}, max_runtime={max_runtime_seconds}s)")
|
||||
logger.info(f"Starting async worker {worker_id}")
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
update_handler = None
|
||||
@@ -61,11 +51,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
)
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# No jobs available - check if we should restart based on time while idle
|
||||
runtime = time.time() - start_time
|
||||
if runtime >= max_runtime_seconds:
|
||||
logger.info(f"Worker {worker_id} idle and reached max runtime ({runtime:.0f}s), restarting")
|
||||
return "restart"
|
||||
# No jobs available, continue loop
|
||||
continue
|
||||
except Exception as e:
|
||||
# Handle expected Empty exception from queue timeout
|
||||
@@ -163,10 +149,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
except ProcessorException as e:
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
e.screenshot = None # Free memory immediately
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -186,11 +170,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -209,10 +191,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data, as_error=True)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
if e.page_text:
|
||||
watch.save_error_text(contents=e.page_text)
|
||||
|
||||
@@ -229,11 +209,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
if e.xpath_data:
|
||||
watch.save_xpath_data(data=e.xpath_data)
|
||||
e.xpath_data = None # Free memory immediately
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if watch.get('filter_failure_notification_send', False):
|
||||
@@ -325,7 +303,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
@@ -337,7 +314,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
e.screenshot = None # Free memory immediately
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code,
|
||||
@@ -379,17 +355,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
if changed_detected or not watch.history_n:
|
||||
if update_handler.screenshot:
|
||||
watch.save_screenshot(screenshot=update_handler.screenshot)
|
||||
# Free screenshot memory immediately after saving
|
||||
update_handler.screenshot = None
|
||||
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'screenshot'):
|
||||
update_handler.fetcher.screenshot = None
|
||||
|
||||
if update_handler.xpath_data:
|
||||
watch.save_xpath_data(data=update_handler.xpath_data)
|
||||
# Free xpath data memory
|
||||
update_handler.xpath_data = None
|
||||
if hasattr(update_handler, 'fetcher') and hasattr(update_handler.fetcher, 'xpath_data'):
|
||||
update_handler.fetcher.xpath_data = None
|
||||
|
||||
# Ensure unique timestamp for history
|
||||
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
|
||||
@@ -456,20 +424,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
update_handler.fetcher.clear_content()
|
||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
||||
|
||||
# Explicitly delete update_handler to free all references
|
||||
if update_handler:
|
||||
del update_handler
|
||||
update_handler = None
|
||||
|
||||
# Force aggressive memory cleanup after clearing
|
||||
import gc
|
||||
gc.collect()
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
||||
@@ -534,19 +488,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Small yield for normal completion
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
# Job completed - increment counter and check restart conditions
|
||||
jobs_processed += 1
|
||||
runtime = time.time() - start_time
|
||||
|
||||
# Check if we should restart (only when idle, between jobs)
|
||||
should_restart_jobs = jobs_processed >= max_jobs
|
||||
should_restart_time = runtime >= max_runtime_seconds
|
||||
|
||||
if should_restart_jobs or should_restart_time:
|
||||
reason = f"{jobs_processed} jobs" if should_restart_jobs else f"{runtime:.0f}s runtime"
|
||||
logger.info(f"Worker {worker_id} restarting after {reason} ({jobs_processed} jobs, {runtime:.0f}s runtime)")
|
||||
return "restart"
|
||||
|
||||
# Check if we should exit
|
||||
if app.config.exit.is_set():
|
||||
break
|
||||
@@ -554,12 +495,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
import sys
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
|
||||
if not in_pytest:
|
||||
logger.info(f"Worker {worker_id} shutting down")
|
||||
|
||||
return "shutdown"
|
||||
|
||||
|
||||
def cleanup_error_artifacts(uuid, datastore):
|
||||
"""Helper function to clean up error artifacts"""
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import time
|
||||
import threading
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session, current_app
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||
from flask_babel import gettext
|
||||
from loguru import logger
|
||||
|
||||
@@ -404,25 +404,4 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/language/auto-detect", methods=['GET'])
|
||||
def delete_locale_language_session_var_if_it_exists():
|
||||
"""Clear the session locale preference to auto-detect from browser Accept-Language header"""
|
||||
if 'locale' in session:
|
||||
session.pop('locale', None)
|
||||
# Refresh Flask-Babel to clear cached locale
|
||||
from flask_babel import refresh
|
||||
refresh()
|
||||
flash(gettext("Language set to auto-detect from browser"))
|
||||
|
||||
# Check if there's a redirect parameter to return to the same page
|
||||
redirect_url = request.args.get('redirect')
|
||||
|
||||
# If redirect is provided and safe, use it
|
||||
from changedetectionio.is_safe_url import is_safe_url
|
||||
if redirect_url and is_safe_url(redirect_url, current_app):
|
||||
return redirect(redirect_url)
|
||||
|
||||
# Otherwise redirect to watchlist
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
return ui_blueprint
|
||||
@@ -71,19 +71,10 @@ class Fetcher():
|
||||
supports_screenshots = False # Can capture page screenshots
|
||||
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
|
||||
|
||||
# Screenshot element locking - prevents layout shifts during screenshot capture
|
||||
# Only needed for visual comparison (image_ssim_diff processor)
|
||||
# Locks element dimensions in the first viewport to prevent headers/ads from resizing
|
||||
lock_viewport_elements = False # Default: disabled for performance
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
if kwargs and 'screenshot_format' in kwargs:
|
||||
self.screenshot_format = kwargs.get('screenshot_format')
|
||||
|
||||
# Allow lock_viewport_elements to be set via kwargs
|
||||
if kwargs and 'lock_viewport_elements' in kwargs:
|
||||
self.lock_viewport_elements = kwargs.get('lock_viewport_elements')
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
|
||||
@@ -10,20 +10,18 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, vi
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
async def capture_full_page_async(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport_size
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"{watch_info}Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
|
||||
|
||||
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
|
||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
||||
@@ -31,31 +29,25 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport_size['height']:
|
||||
lock_start = time.time()
|
||||
if page_height > page.viewport_size['height']:
|
||||
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
await page.evaluate(lock_elements_js)
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled (took {lock_time:.2f}s)")
|
||||
|
||||
if page_height > page.viewport_size['height']:
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
viewport_start = time.time()
|
||||
logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||
# Set viewport to a larger size to capture more content at once
|
||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport_size['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
# Capture screenshots in chunks up to the max total height
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
|
||||
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
|
||||
# PNG should use quality 100, JPEG uses configurable quality
|
||||
@@ -77,11 +69,7 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
# Restore original viewport size
|
||||
@@ -93,54 +81,40 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
|
||||
# If we have multiple chunks, stitch them together
|
||||
if len(screenshot_chunks) > 1:
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
|
||||
# Only use separate process for many chunks (4+) to avoid blocking the event loop
|
||||
if len(screenshot_chunks) <= 3:
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
|
||||
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
|
||||
else:
|
||||
# Use separate process for many chunks to avoid blocking
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
# Explicit cleanup
|
||||
del p
|
||||
del parent_conn, child_conn
|
||||
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
# Explicit cleanup
|
||||
del screenshot_chunks
|
||||
screenshot_chunks = None
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
return screenshot_chunks[0]
|
||||
|
||||
@@ -210,8 +184,7 @@ class fetcher(Fetcher):
|
||||
|
||||
async def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
watch_uuid = getattr(self, 'watch_uuid', None)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
# Request GC immediately after screenshot to free memory
|
||||
# Screenshots can be large and browser steps take many of them
|
||||
@@ -260,7 +233,6 @@ class fetcher(Fetcher):
|
||||
import playwright._impl._errors
|
||||
import time
|
||||
self.delete_browser_steps_screenshots()
|
||||
self.watch_uuid = watch_uuid # Store for use in screenshot_step
|
||||
response = None
|
||||
|
||||
async with async_playwright() as p:
|
||||
@@ -346,7 +318,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
|
||||
# Cleanup before raising to prevent memory leak
|
||||
await self.page.close()
|
||||
await context.close()
|
||||
@@ -402,17 +374,7 @@ class fetcher(Fetcher):
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - screenshots are large and base64 decode creates temporary buffers
|
||||
await self.page.request_gc()
|
||||
gc.collect()
|
||||
# Release C-level memory from base64 decode back to OS
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
except ScreenshotUnavailable:
|
||||
# Re-raise screenshot unavailable exceptions
|
||||
|
||||
@@ -20,20 +20,18 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
|
||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||
# acceptable screenshot quality here
|
||||
async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, lock_viewport_elements=False):
|
||||
async def capture_full_page(page, screenshot_format='JPEG'):
|
||||
import os
|
||||
import time
|
||||
import multiprocessing
|
||||
|
||||
start = time.time()
|
||||
watch_info = f"[{watch_uuid}] " if watch_uuid else ""
|
||||
|
||||
setup_start = time.time()
|
||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||
original_viewport = page.viewport
|
||||
dimensions_time = time.time() - setup_start
|
||||
|
||||
logger.debug(f"{watch_info}Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width} (got dimensions in {dimensions_time:.2f}s)")
|
||||
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
@@ -52,35 +50,20 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
screenshot_chunks = []
|
||||
y = 0
|
||||
elements_locked = False
|
||||
|
||||
# Only lock viewport elements if explicitly enabled (for image_ssim_diff processor)
|
||||
# This prevents headers/ads from resizing when viewport changes
|
||||
if lock_viewport_elements and page_height > page.viewport['height']:
|
||||
lock_start = time.time()
|
||||
if page_height > page.viewport['height']:
|
||||
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
|
||||
# capture_full_page() changes viewport height which triggers @media (min-height) rules
|
||||
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
|
||||
file_read_start = time.time()
|
||||
with open(lock_elements_js_path, 'r') as f:
|
||||
lock_elements_js = f.read()
|
||||
file_read_time = time.time() - file_read_start
|
||||
|
||||
evaluate_start = time.time()
|
||||
await page.evaluate(lock_elements_js)
|
||||
evaluate_time = time.time() - evaluate_start
|
||||
|
||||
elements_locked = True
|
||||
lock_time = time.time() - lock_start
|
||||
logger.debug(f"{watch_info}Viewport element locking enabled - File read: {file_read_time:.3f}s, Browser evaluate: {evaluate_time:.2f}s, Total: {lock_time:.2f}s")
|
||||
logger.debug("Element dimensions locked before screenshot capture")
|
||||
|
||||
if page_height > page.viewport['height']:
|
||||
if page_height < step_size:
|
||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||
viewport_start = time.time()
|
||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||
viewport_time = time.time() - viewport_start
|
||||
logger.debug(f"{watch_info}Viewport changed to {page.viewport['width']}x{step_size} (took {viewport_time:.2f}s)")
|
||||
|
||||
capture_start = time.time()
|
||||
chunk_times = []
|
||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||
# better than scrollTo incase they override it in the page
|
||||
await page.evaluate(
|
||||
@@ -99,11 +82,7 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
if screenshot_type == 'jpeg':
|
||||
screenshot_kwargs['quality'] = screenshot_quality
|
||||
|
||||
chunk_start = time.time()
|
||||
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
|
||||
chunk_time = time.time() - chunk_start
|
||||
chunk_times.append(chunk_time)
|
||||
logger.debug(f"{watch_info}Chunk {len(screenshot_chunks)} captured in {chunk_time:.2f}s")
|
||||
y += step_size
|
||||
|
||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||
@@ -114,53 +93,26 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
|
||||
with open(unlock_elements_js_path, 'r') as f:
|
||||
unlock_elements_js = f.read()
|
||||
await page.evaluate(unlock_elements_js)
|
||||
logger.debug(f"{watch_info}Element dimensions unlocked after screenshot capture")
|
||||
|
||||
capture_time = time.time() - capture_start
|
||||
total_capture_time = sum(chunk_times)
|
||||
logger.debug(f"{watch_info}All {len(screenshot_chunks)} chunks captured in {capture_time:.2f}s (total chunk time: {total_capture_time:.2f}s)")
|
||||
logger.debug("Element dimensions unlocked after screenshot capture")
|
||||
|
||||
if len(screenshot_chunks) > 1:
|
||||
stitch_start = time.time()
|
||||
logger.debug(f"{watch_info}Starting stitching of {len(screenshot_chunks)} chunks")
|
||||
|
||||
# Always use spawn subprocess for ANY stitching (2+ chunks)
|
||||
# PIL allocates at C level and Python GC never releases it - subprocess exit forces OS to reclaim
|
||||
# Trade-off: 35MB resource_tracker vs 500MB+ PIL leak in main process
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker_raw_bytes
|
||||
import multiprocessing
|
||||
import struct
|
||||
|
||||
# Always use spawn for thread safety - consistent behavior in tests and production
|
||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||
ctx = multiprocessing.get_context('spawn')
|
||||
parent_conn, child_conn = ctx.Pipe()
|
||||
p = ctx.Process(target=stitch_images_worker_raw_bytes, args=(child_conn, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p = ctx.Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||
p.start()
|
||||
|
||||
# Send via raw bytes (no pickle)
|
||||
parent_conn.send_bytes(struct.pack('I', len(screenshot_chunks)))
|
||||
for chunk in screenshot_chunks:
|
||||
parent_conn.send_bytes(chunk)
|
||||
|
||||
screenshot = parent_conn.recv_bytes()
|
||||
p.join()
|
||||
|
||||
parent_conn.close()
|
||||
child_conn.close()
|
||||
del p, parent_conn, child_conn
|
||||
|
||||
stitch_time = time.time() - stitch_start
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time - stitch_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Capture: {capture_time:.2f}s, Stitching: {stitch_time:.2f}s, Total: {total_time:.2f}s")
|
||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
screenshot_chunks = None
|
||||
return screenshot
|
||||
|
||||
total_time = time.time() - start
|
||||
setup_time = total_time - capture_time
|
||||
logger.debug(
|
||||
f"{watch_info}Screenshot complete - Page height: {page_height}px, Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT}px | "
|
||||
f"Setup: {setup_time:.2f}s, Single chunk: {capture_time:.2f}s, Total: {total_time:.2f}s")
|
||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||
return screenshot_chunks[0]
|
||||
|
||||
|
||||
@@ -405,7 +357,7 @@ class fetcher(Fetcher):
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
@@ -435,17 +387,7 @@ class fetcher(Fetcher):
|
||||
|
||||
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - pyppeteer base64 decode creates temporary buffers
|
||||
import gc
|
||||
gc.collect()
|
||||
# Release C-level memory from base64 decode back to OS
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Lock Element Dimensions for Screenshot Capture (First Viewport Only)
|
||||
* Lock Element Dimensions for Screenshot Capture
|
||||
*
|
||||
* THE PROBLEM:
|
||||
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
|
||||
@@ -10,31 +10,40 @@
|
||||
* However, changing the viewport height triggers CSS media queries like:
|
||||
* @media (min-height: 860px) { .ad { height: 250px; } }
|
||||
*
|
||||
* This causes elements (especially ads/headers) to resize during screenshot capture.
|
||||
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
|
||||
* - Screenshot shows element at NEW size (after media query triggered)
|
||||
* - xpath element coordinates measured at OLD size (before viewport change)
|
||||
* - Visual selector overlays don't align with screenshot
|
||||
*
|
||||
* EXAMPLE BUG:
|
||||
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
|
||||
* - Viewport changes to 1280x3809 for screenshot
|
||||
* - Media query triggers: ad expands to 250px
|
||||
* - All content below shifts down by 112px (250-138)
|
||||
* - Article now at position: 391px (279+112)
|
||||
* - But xpath data says 279px → 112px mismatch! ✗
|
||||
*
|
||||
* THE SOLUTION:
|
||||
* Lock element dimensions in the FIRST VIEWPORT ONLY with !important inline styles.
|
||||
* This prevents headers, navigation, and top ads from resizing when viewport changes.
|
||||
* We only lock the visible portion because:
|
||||
* - Most layout shifts happen in headers/navbars/top ads
|
||||
* - Locking only visible elements is 100x+ faster (100-200 elements vs 10,000+)
|
||||
* - Below-fold content shifts don't affect visual comparison accuracy
|
||||
* Before changing viewport, lock ALL element dimensions with !important inline styles.
|
||||
* Inline styles with !important override media query CSS, preventing layout changes.
|
||||
*
|
||||
* WHAT THIS SCRIPT DOES:
|
||||
* 1. Gets current viewport height
|
||||
* 2. Finds elements within first viewport (top of page to bottom of screen)
|
||||
* 3. Locks their dimensions with !important inline styles
|
||||
* 1. Iterates through every element on the page
|
||||
* 2. Captures current computed dimensions (width, height)
|
||||
* 3. Sets inline styles with !important to freeze those dimensions
|
||||
* 4. Disables ResizeObserver API (for JS-based resizing)
|
||||
* 5. When viewport changes for screenshot, media queries can't resize anything
|
||||
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
|
||||
*
|
||||
* USAGE:
|
||||
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
|
||||
* Only enabled for image_ssim_diff processor (visual comparison).
|
||||
* Default: OFF for performance.
|
||||
* The page must be fully loaded and settled at its initial viewport size.
|
||||
* No need to restore state afterward - page is closed after screenshot.
|
||||
*
|
||||
* PERFORMANCE:
|
||||
* - Only processes 100-300 elements (first viewport) vs 10,000+ (entire page)
|
||||
* - Typically completes in 10-50ms
|
||||
* - 100x+ faster than locking entire page
|
||||
* - Iterates all DOM elements (can be 1000s on complex pages)
|
||||
* - Typically completes in 50-200ms
|
||||
* - One-time cost before screenshot, well worth it for coordinate accuracy
|
||||
*
|
||||
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
|
||||
*/
|
||||
@@ -43,34 +52,11 @@
|
||||
// Store original styles in a global WeakMap for later restoration
|
||||
window.__elementSizingRestore = new WeakMap();
|
||||
|
||||
const start = performance.now();
|
||||
|
||||
// Get current viewport height (visible portion of page)
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
// Get all elements and filter to FIRST VIEWPORT ONLY
|
||||
// This dramatically reduces elements to process (100-300 vs 10,000+)
|
||||
const allElements = Array.from(document.querySelectorAll('*'));
|
||||
|
||||
// BATCH READ PHASE: Get bounding rects and filter to viewport
|
||||
const measurements = allElements.map(el => {
|
||||
const rect = el.getBoundingClientRect();
|
||||
// Lock ALL element dimensions to prevent media query layout changes
|
||||
document.querySelectorAll('*').forEach(el => {
|
||||
const computed = window.getComputedStyle(el);
|
||||
const rect = el.getBoundingClientRect();
|
||||
|
||||
// Only lock elements in the first viewport (visible on initial page load)
|
||||
// rect.top < viewportHeight means element starts within visible area
|
||||
const inViewport = rect.top < viewportHeight && rect.top >= 0;
|
||||
const hasSize = rect.height > 0 && rect.width > 0;
|
||||
|
||||
return inViewport && hasSize ? { el, computed, rect } : null;
|
||||
}).filter(Boolean); // Remove null entries
|
||||
|
||||
const elapsed = performance.now() - start;
|
||||
console.log(`Locked first viewport elements: ${measurements.length} of ${allElements.length} total elements (viewport height: ${viewportHeight}px, took ${elapsed.toFixed(0)}ms)`);
|
||||
|
||||
// BATCH WRITE PHASE: Apply all inline styles without triggering layout
|
||||
// No interleaved reads means browser can optimize style application
|
||||
measurements.forEach(({el, computed, rect}) => {
|
||||
// Save original inline style values BEFORE locking
|
||||
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
|
||||
const originalStyles = {};
|
||||
@@ -103,5 +89,5 @@
|
||||
disconnect() {}
|
||||
};
|
||||
|
||||
console.log(`✓ Element dimensions locked (${measurements.length} elements) to prevent media query changes during screenshot`);
|
||||
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
|
||||
})();
|
||||
|
||||
@@ -8,42 +8,92 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
||||
|
||||
def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_height):
|
||||
# Cache font to avoid loading on every stitch
|
||||
_cached_font = None
|
||||
|
||||
def _get_caption_font():
|
||||
"""Get or create cached font for caption text."""
|
||||
global _cached_font
|
||||
if _cached_font is None:
|
||||
from PIL import ImageFont
|
||||
try:
|
||||
_cached_font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
_cached_font = ImageFont.load_default()
|
||||
return _cached_font
|
||||
|
||||
|
||||
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
|
||||
Uses spawn multiprocessing to isolate PIL's C-level memory allocation.
|
||||
When the subprocess exits, the OS reclaims ALL memory including C-level allocations
|
||||
that Python's GC cannot release. This prevents the ~50MB per stitch from accumulating
|
||||
in the main process.
|
||||
|
||||
Trade-off: Adds 35MB resource_tracker subprocess, but prevents 500MB+ memory leak
|
||||
in main process (much better at scale: 35GB vs 500GB for 1000 instances).
|
||||
Stitch image chunks together inline (no multiprocessing).
|
||||
Optimized for small number of chunks (2-3) to avoid process creation overhead.
|
||||
|
||||
Args:
|
||||
pipe_conn: Pipe connection to receive data and send result
|
||||
chunks_bytes: List of JPEG image bytes
|
||||
original_page_height: Original page height in pixels
|
||||
capture_height: Maximum capture height
|
||||
|
||||
Returns:
|
||||
bytes: Stitched JPEG image
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
# Create stitched image
|
||||
stitched = Image.new('RGB', (max_width, total_height))
|
||||
y_offset = 0
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font = _get_caption_font()
|
||||
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode to JPEG
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result = output.getvalue()
|
||||
|
||||
# Cleanup
|
||||
stitched.close()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
||||
"""
|
||||
Stitch image chunks together in a separate process.
|
||||
Used for large number of chunks (4+) to avoid blocking the main event loop.
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
import struct
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
try:
|
||||
# Receive chunk count as 4-byte integer (no pickle!)
|
||||
count_bytes = pipe_conn.recv_bytes()
|
||||
chunk_count = struct.unpack('I', count_bytes)[0]
|
||||
|
||||
# Receive each chunk as raw bytes (no pickle!)
|
||||
chunks_bytes = []
|
||||
for _ in range(chunk_count):
|
||||
chunks_bytes.append(pipe_conn.recv_bytes())
|
||||
|
||||
# Load images from byte chunks
|
||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||
del chunks_bytes
|
||||
|
||||
total_height = sum(im.height for im in images)
|
||||
max_width = max(im.width for im in images)
|
||||
|
||||
@@ -53,14 +103,15 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
|
||||
for im in images:
|
||||
stitched.paste(im, (0, y_offset))
|
||||
y_offset += im.height
|
||||
im.close()
|
||||
del images
|
||||
im.close() # Close immediately after pasting
|
||||
|
||||
# Draw caption only if page was trimmed
|
||||
if original_page_height > capture_height:
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
|
||||
# Try to load font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", 35)
|
||||
except IOError:
|
||||
@@ -69,26 +120,23 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white background rectangle
|
||||
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
|
||||
|
||||
# Draw text centered
|
||||
text_x = (max_width - text_width) // 2
|
||||
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
|
||||
|
||||
# Encode and send
|
||||
# Encode and send image with optimization
|
||||
output = io.BytesIO()
|
||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
|
||||
result_bytes = output.getvalue()
|
||||
pipe_conn.send_bytes(output.getvalue())
|
||||
|
||||
stitched.close()
|
||||
del stitched
|
||||
output.close()
|
||||
del output
|
||||
|
||||
pipe_conn.send_bytes(result_bytes)
|
||||
del result_bytes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in stitch_images_worker_raw_bytes: {e}")
|
||||
error_msg = f"error:{e}".encode('utf-8')
|
||||
pipe_conn.send_bytes(error_msg)
|
||||
pipe_conn.send(f"error:{e}")
|
||||
finally:
|
||||
pipe_conn.close()
|
||||
|
||||
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
"""
|
||||
Favicon utilities for changedetection.io
|
||||
Handles favicon MIME type detection with caching
|
||||
"""
|
||||
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
@lru_cache(maxsize=1000)
|
||||
def get_favicon_mime_type(filepath):
|
||||
"""
|
||||
Detect MIME type of favicon by reading file content using puremagic.
|
||||
Results are cached to avoid repeatedly reading the same files.
|
||||
|
||||
Args:
|
||||
filepath: Full path to the favicon file
|
||||
|
||||
Returns:
|
||||
MIME type string (e.g., 'image/png')
|
||||
"""
|
||||
mime = None
|
||||
|
||||
try:
|
||||
import puremagic
|
||||
with open(filepath, 'rb') as f:
|
||||
content_bytes = f.read(200) # Read first 200 bytes
|
||||
|
||||
detections = puremagic.magic_string(content_bytes)
|
||||
if detections:
|
||||
mime = detections[0].mime_type
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to mimetypes if puremagic fails
|
||||
if not mime:
|
||||
import mimetypes
|
||||
mime, _ = mimetypes.guess_type(filepath)
|
||||
|
||||
# Final fallback based on extension
|
||||
if not mime:
|
||||
mime = 'image/x-icon' if filepath.endswith('.ico') else 'image/png'
|
||||
|
||||
return mime
|
||||
@@ -44,8 +44,6 @@ from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, Watch
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
from changedetectionio.languages import get_available_languages, get_language_codes, get_flag_for_locale, get_timeago_locale
|
||||
from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
IN_PYTEST = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
datastore = None
|
||||
@@ -71,13 +69,9 @@ socketio_server = None
|
||||
CORS(app)
|
||||
|
||||
# Super handy for compressing large BrowserSteps responses and others
|
||||
# Flask-Compress handles HTTP compression, Socket.IO compression disabled to prevent memory leak
|
||||
compress = FlaskCompress()
|
||||
app.config['COMPRESS_MIN_SIZE'] = 2096
|
||||
FlaskCompress(app)
|
||||
app.config['COMPRESS_MIN_SIZE'] = 4096
|
||||
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
|
||||
# Use gzip only - smaller memory footprint than zstd/brotli (4-8KB vs 200-500KB contexts)
|
||||
app.config['COMPRESS_ALGORITHM'] = ['gzip']
|
||||
compress.init_app(app)
|
||||
app.config['TEMPLATES_AUTO_RELOAD'] = False
|
||||
|
||||
|
||||
@@ -94,14 +88,6 @@ if os.getenv('FLASK_SERVER_NAME'):
|
||||
app.config['BABEL_TRANSLATION_DIRECTORIES'] = str(Path(__file__).parent / 'translations')
|
||||
app.config['BABEL_DEFAULT_LOCALE'] = 'en_GB'
|
||||
|
||||
# Session configuration
|
||||
# NOTE: Flask session (for locale, etc.) is separate from Flask-Login's remember-me cookie
|
||||
# - Flask session stores data like session['locale'] in a signed cookie
|
||||
# - Flask-Login's remember=True creates a separate authentication cookie
|
||||
# - Setting PERMANENT_SESSION_LIFETIME controls how long the Flask session cookie lasts
|
||||
from datetime import timedelta
|
||||
app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(days=3650) # ~10 years (effectively unlimited)
|
||||
|
||||
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
|
||||
|
||||
|
||||
@@ -414,27 +400,11 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
language_codes = get_language_codes()
|
||||
|
||||
def get_locale():
|
||||
# Locale aliases: map browser language codes to translation directory names
|
||||
# This handles cases where browsers send standard codes (e.g., zh-TW)
|
||||
# but our translations use more specific codes (e.g., zh_Hant_TW)
|
||||
locale_aliases = {
|
||||
'zh-TW': 'zh_Hant_TW', # Traditional Chinese: browser sends zh-TW, we use zh_Hant_TW
|
||||
'zh_TW': 'zh_Hant_TW', # Also handle underscore variant
|
||||
}
|
||||
|
||||
# 1. Try to get locale from session (user explicitly selected)
|
||||
if 'locale' in session:
|
||||
return session['locale']
|
||||
|
||||
# 2. Fall back to Accept-Language header
|
||||
# Get the best match from browser's Accept-Language header
|
||||
browser_locale = request.accept_languages.best_match(language_codes + list(locale_aliases.keys()))
|
||||
|
||||
# 3. Check if we need to map the browser locale to our internal locale
|
||||
if browser_locale in locale_aliases:
|
||||
return locale_aliases[browser_locale]
|
||||
|
||||
return browser_locale
|
||||
return request.accept_languages.best_match(language_codes)
|
||||
|
||||
# Initialize Babel with locale selector
|
||||
babel = Babel(app, locale_selector=get_locale)
|
||||
@@ -558,9 +528,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# Validate the locale against available languages
|
||||
if locale in language_codes:
|
||||
# Make session permanent so language preference persists across browser sessions
|
||||
# NOTE: This is the Flask session cookie (separate from Flask-Login's remember-me auth cookie)
|
||||
session.permanent = True
|
||||
session['locale'] = locale
|
||||
|
||||
# CRITICAL: Flask-Babel caches the locale in the request context (ctx.babel_locale)
|
||||
@@ -699,9 +666,16 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
|
||||
@@ -20,9 +20,8 @@ mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86
|
||||
|
||||
def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
Save compressed data using native brotli with streaming compression.
|
||||
Uses chunked compression to minimize peak memory usage and malloc_trim()
|
||||
to force release of C-level memory back to the OS.
|
||||
Save compressed data using native brotli.
|
||||
Testing shows no memory leak when using gc.collect() after compression.
|
||||
|
||||
Args:
|
||||
contents: data to compress (str or bytes)
|
||||
@@ -38,52 +37,27 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
"""
|
||||
import brotli
|
||||
import gc
|
||||
import ctypes
|
||||
|
||||
# Ensure contents are bytes
|
||||
if isinstance(contents, str):
|
||||
contents = contents.encode('utf-8')
|
||||
|
||||
try:
|
||||
original_size = len(contents)
|
||||
logger.debug(f"Starting brotli streaming compression of {original_size} bytes.")
|
||||
logger.debug(f"Starting brotli compression of {len(contents)} bytes.")
|
||||
|
||||
# Create streaming compressor
|
||||
compressor = brotli.Compressor(quality=6, mode=mode if mode is not None else brotli.MODE_GENERIC)
|
||||
|
||||
# Stream compress in chunks to minimize memory usage
|
||||
chunk_size = 65536 # 64KB chunks
|
||||
total_compressed_size = 0
|
||||
if mode is not None:
|
||||
compressed_data = brotli.compress(contents, mode=mode)
|
||||
else:
|
||||
compressed_data = brotli.compress(contents)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
# Process data in chunks
|
||||
offset = 0
|
||||
while offset < len(contents):
|
||||
chunk = contents[offset:offset + chunk_size]
|
||||
compressed_chunk = compressor.process(chunk)
|
||||
if compressed_chunk:
|
||||
f.write(compressed_chunk)
|
||||
total_compressed_size += len(compressed_chunk)
|
||||
offset += chunk_size
|
||||
f.write(compressed_data)
|
||||
|
||||
# Finalize compression - critical for proper cleanup
|
||||
final_chunk = compressor.finish()
|
||||
if final_chunk:
|
||||
f.write(final_chunk)
|
||||
total_compressed_size += len(final_chunk)
|
||||
logger.debug(f"Finished brotli compression - From {len(contents)} to {len(compressed_data)} bytes.")
|
||||
|
||||
logger.debug(f"Finished brotli compression - From {original_size} to {total_compressed_size} bytes.")
|
||||
|
||||
# Cleanup: Delete compressor, force Python GC, then force C-level memory release
|
||||
del compressor
|
||||
# Force garbage collection to prevent memory buildup
|
||||
gc.collect()
|
||||
|
||||
# Force release of C-level memory back to OS (since brotli is a C library)
|
||||
try:
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass # malloc_trim not available on all systems (e.g., macOS)
|
||||
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -69,7 +69,7 @@ class RecheckPriorityQueue:
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.trace(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -240,10 +240,7 @@ def init_socketio(app, datastore):
|
||||
async_mode=async_mode,
|
||||
cors_allowed_origins=cors_origins, # None means same-origin only
|
||||
logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')),
|
||||
engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')),
|
||||
# Disable WebSocket compression to prevent memory accumulation
|
||||
# Flask-Compress already handles HTTP response compression
|
||||
engineio_options={'http_compression': False, 'compression_threshold': 0})
|
||||
engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')))
|
||||
|
||||
# Set up event handlers
|
||||
logger.info("Socket.IO: Registering connect event handler")
|
||||
|
||||
@@ -91,8 +91,8 @@ REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 tests/test_notif
|
||||
# And again with brotli+screenshot attachment
|
||||
SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5 REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 --dist=load tests/test_backend.py tests/test_rss.py tests/test_unique_lines.py tests/test_notification.py tests/test_access_control.py
|
||||
|
||||
# Try high concurrency with aggressive worker restarts
|
||||
FETCH_WORKERS=50 WORKER_MAX_RUNTIME=2 WORKER_MAX_JOBS=1 pytest tests/test_history_consistency.py -vv -l -s
|
||||
# Try high concurrency
|
||||
FETCH_WORKERS=50 pytest tests/test_history_consistency.py -vv -l -s
|
||||
|
||||
# Check file:// will pickup a file when enabled
|
||||
echo "Hello world" > /tmp/test-file.txt
|
||||
|
||||
@@ -1,25 +1,19 @@
|
||||
{
|
||||
"name": "ChangeDetection.io",
|
||||
"short_name": "ChangeDetect",
|
||||
"description": "Self-hosted website change detection and monitoring",
|
||||
"name": "",
|
||||
"short_name": "",
|
||||
"icons": [
|
||||
{
|
||||
"src": "android-chrome-192x192.png",
|
||||
"sizes": "192x192",
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
"type": "image/png"
|
||||
},
|
||||
{
|
||||
"src": "android-chrome-256x256.png",
|
||||
"sizes": "256x256",
|
||||
"type": "image/png",
|
||||
"purpose": "any maskable"
|
||||
"type": "image/png"
|
||||
}
|
||||
],
|
||||
"start_url": "/",
|
||||
"theme_color": "#5bbad5",
|
||||
"theme_color": "#ffffff",
|
||||
"background_color": "#ffffff",
|
||||
"display": "standalone",
|
||||
"categories": ["utilities", "productivity"],
|
||||
"orientation": "any"
|
||||
"display": "standalone"
|
||||
}
|
||||
|
||||
@@ -69,19 +69,6 @@
|
||||
}
|
||||
});
|
||||
|
||||
// Handle Enter key in search input
|
||||
if (searchInput) {
|
||||
searchInput.addEventListener('keydown', function(e) {
|
||||
if (e.key === 'Enter') {
|
||||
e.preventDefault();
|
||||
if (searchForm) {
|
||||
// Trigger form submission programmatically
|
||||
searchForm.dispatchEvent(new Event('submit'));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Handle form submission
|
||||
if (searchForm) {
|
||||
searchForm.addEventListener('submit', function(e) {
|
||||
@@ -101,8 +88,8 @@
|
||||
params.append('tags', tags);
|
||||
}
|
||||
|
||||
// Navigate to search results (always redirect to watchlist home)
|
||||
window.location.href = '/?' + params.toString();
|
||||
// Navigate to search results
|
||||
window.location.href = '?' + params.toString();
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import gc
|
||||
import shutil
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
@@ -126,10 +125,6 @@ class ChangeDetectionStore:
|
||||
if 'application' in from_disk['settings']:
|
||||
self.__data['settings']['application'].update(from_disk['settings']['application'])
|
||||
|
||||
# from_disk no longer needed - free memory immediately
|
||||
del from_disk
|
||||
gc.collect()
|
||||
|
||||
# Convert each existing watch back to the Watch.model object
|
||||
for uuid, watch in self.__data['watching'].items():
|
||||
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch)
|
||||
@@ -455,7 +450,7 @@ class ChangeDetectionStore:
|
||||
data = deepcopy(self.__data)
|
||||
except RuntimeError as e:
|
||||
# Try again in 15 seconds
|
||||
time.sleep(1)
|
||||
time.sleep(15)
|
||||
logger.error(f"! Data changed when writing to JSON, trying again.. {str(e)}")
|
||||
self.sync_to_json()
|
||||
return
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="{{url_for('static_content', group='favicons', filename='apple-touch-icon.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="{{url_for('static_content', group='favicons', filename='favicon-32x32.png')}}">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="{{url_for('static_content', group='favicons', filename='favicon-16x16.png')}}">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}" crossorigin="use-credentials">
|
||||
<link rel="manifest" href="{{url_for('static_content', group='favicons', filename='site.webmanifest')}}">
|
||||
<link rel="mask-icon" href="{{url_for('static_content', group='favicons', filename='safari-pinned-tab.svg')}}" color="#5bbad5">
|
||||
<link rel="shortcut icon" href="{{url_for('static_content', group='favicons', filename='favicon.ico')}}">
|
||||
<meta name="msapplication-TileColor" content="#da532c">
|
||||
@@ -265,9 +265,6 @@
|
||||
</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div>
|
||||
<a href="{{ url_for('ui.delete_locale_language_session_var_if_it_exists', redirect=request.path) }}" >{{ _('Auto-detect from browser') }}</a>
|
||||
</div>
|
||||
<div>
|
||||
{{ _('Language support is in beta, please help us improve by opening a PR on GitHub with any updates.') }}
|
||||
</div>
|
||||
|
||||
@@ -50,9 +50,7 @@
|
||||
<span class="visually-hidden">{{ _('Change language') }}</span>
|
||||
<span class="{{ get_flag_for_locale(get_locale()) }}" id="language-selector-flag"></span>
|
||||
</button>
|
||||
<a class="github-link" href="https://github.com/dgtlmoon/changedetection.io"
|
||||
target="_blank"
|
||||
rel="noopener" >
|
||||
<a class="github-link" href="https://github.com/dgtlmoon/changedetection.io">
|
||||
{% include "svgs/github.svg" %}
|
||||
</a>
|
||||
</li>
|
||||
|
||||
@@ -160,7 +160,7 @@ def test_invalid_locale(client, live_server, measure_memory_usage, datastore_pat
|
||||
def test_language_persistence_in_session(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that the language preference persists across multiple requests
|
||||
within the same session, and that auto-detect properly clears the preference.
|
||||
within the same session.
|
||||
"""
|
||||
|
||||
# Establish session cookie
|
||||
@@ -184,34 +184,6 @@ def test_language_persistence_in_session(client, live_server, measure_memory_usa
|
||||
assert res.status_code == 200
|
||||
assert b"Annulla" in res.data, "Italian text should persist across requests"
|
||||
|
||||
# Verify locale is in session
|
||||
with client.session_transaction() as sess:
|
||||
assert sess.get('locale') == 'it', "Locale should be set in session"
|
||||
|
||||
# Call auto-detect to clear the locale
|
||||
res = client.get(
|
||||
url_for("ui.delete_locale_language_session_var_if_it_exists"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
# Verify the flash message appears (in English since we cleared the locale)
|
||||
assert b"Language set to auto-detect from browser" in res.data, "Should show flash message"
|
||||
|
||||
# Verify locale was removed from session
|
||||
with client.session_transaction() as sess:
|
||||
assert 'locale' not in sess, "Locale should be removed from session after auto-detect"
|
||||
|
||||
# Now requests should use browser default (English in test environment)
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
assert b"Cancel" in res.data, "Should show English after auto-detect clears Italian"
|
||||
assert b"Annulla" not in res.data, "Should not show Italian after auto-detect"
|
||||
|
||||
|
||||
def test_set_language_with_redirect(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
@@ -353,274 +325,3 @@ def test_time_unit_translations(client, live_server, measure_memory_usage, datas
|
||||
assert b"Time Between Check" not in res.data, "Should not have English 'Time Between Check'"
|
||||
assert "Chrome 請求".encode() not in res.data, "Should not have incorrect 'Chrome 請求' (Chrome requests)"
|
||||
assert "使用預設通知".encode() not in res.data, "Should not have incorrect '使用預設通知' (Use default notification)"
|
||||
|
||||
|
||||
def test_accept_language_header_zh_tw(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that browsers sending zh-TW in Accept-Language header get Traditional Chinese.
|
||||
This tests the locale alias mapping for issue #3779.
|
||||
"""
|
||||
from flask import url_for
|
||||
|
||||
# Clear any session data to simulate a fresh visitor
|
||||
with client.session_transaction() as sess:
|
||||
sess.clear()
|
||||
|
||||
# Request the index page with zh-TW in Accept-Language header (what browsers send)
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
headers={'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Should get Traditional Chinese content, not Simplified Chinese
|
||||
# Traditional: 選擇語言, Simplified: 选择语言
|
||||
assert '選擇語言'.encode() in res.data, "Expected Traditional Chinese '選擇語言' (Select Language)"
|
||||
assert '选择语言'.encode() not in res.data, "Should not get Simplified Chinese '选择语言'"
|
||||
|
||||
# Check HTML lang attribute uses BCP 47 format
|
||||
assert b'<html lang="zh-Hant-TW"' in res.data, "Expected BCP 47 language tag zh-Hant-TW in HTML"
|
||||
|
||||
# Check that the correct flag icon is shown (Taiwan flag for Traditional Chinese)
|
||||
assert b'<span class="fi fi-tw fis" id="language-selector-flag">' in res.data, \
|
||||
"Expected Taiwan flag 'fi fi-tw' for Traditional Chinese"
|
||||
assert b'<span class="fi fi-cn fis" id="language-selector-flag">' not in res.data, \
|
||||
"Should not show China flag 'fi fi-cn' for Traditional Chinese"
|
||||
|
||||
# Verify we're getting Traditional Chinese text throughout the page
|
||||
res = client.get(
|
||||
url_for("settings.settings_page"),
|
||||
headers={'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Check Traditional Chinese translations (not English)
|
||||
assert "小時".encode() in res.data, "Expected Traditional Chinese '小時' for Hours"
|
||||
assert "分鐘".encode() in res.data, "Expected Traditional Chinese '分鐘' for Minutes"
|
||||
assert b"Hours" not in res.data or "小時".encode() in res.data, "Should have Traditional Chinese, not English"
|
||||
|
||||
|
||||
def test_accept_language_header_en_variants(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that browsers sending en-GB and en-US in Accept-Language header get the correct English variant.
|
||||
This ensures the locale selector works properly for English variants.
|
||||
"""
|
||||
from flask import url_for
|
||||
|
||||
# Test 1: British English (en-GB)
|
||||
with client.session_transaction() as sess:
|
||||
sess.clear()
|
||||
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
headers={'Accept-Language': 'en-GB,en;q=0.9'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Should get English content
|
||||
assert b"Select Language" in res.data, "Expected English text 'Select Language'"
|
||||
|
||||
# Check HTML lang attribute uses BCP 47 format with hyphen
|
||||
assert b'<html lang="en-GB"' in res.data, "Expected BCP 47 language tag en-GB in HTML"
|
||||
|
||||
# Check that the correct flag icon is shown (UK flag for en-GB)
|
||||
assert b'<span class="fi fi-gb fis" id="language-selector-flag">' in res.data, \
|
||||
"Expected UK flag 'fi fi-gb' for British English"
|
||||
|
||||
# Test 2: American English (en-US)
|
||||
with client.session_transaction() as sess:
|
||||
sess.clear()
|
||||
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
headers={'Accept-Language': 'en-US,en;q=0.9'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Should get English content
|
||||
assert b"Select Language" in res.data, "Expected English text 'Select Language'"
|
||||
|
||||
# Check HTML lang attribute uses BCP 47 format with hyphen
|
||||
assert b'<html lang="en-US"' in res.data, "Expected BCP 47 language tag en-US in HTML"
|
||||
|
||||
# Check that the correct flag icon is shown (US flag for en-US)
|
||||
assert b'<span class="fi fi-us fis" id="language-selector-flag">' in res.data, \
|
||||
"Expected US flag 'fi fi-us' for American English"
|
||||
|
||||
# Test 3: Generic 'en' should fall back to one of the English variants
|
||||
with client.session_transaction() as sess:
|
||||
sess.clear()
|
||||
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
headers={'Accept-Language': 'en'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Should get English content (either variant is fine)
|
||||
assert b"Select Language" in res.data, "Expected English text 'Select Language'"
|
||||
|
||||
|
||||
def test_accept_language_header_zh_simplified(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that browsers sending zh or zh-CN in Accept-Language header get Simplified Chinese.
|
||||
This ensures Simplified Chinese still works correctly and doesn't get confused with Traditional.
|
||||
"""
|
||||
from flask import url_for
|
||||
|
||||
# Test 1: Generic 'zh' should get Simplified Chinese
|
||||
with client.session_transaction() as sess:
|
||||
sess.clear()
|
||||
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
headers={'Accept-Language': 'zh,en;q=0.9'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Should get Simplified Chinese content, not Traditional Chinese
|
||||
# Simplified: 选择语言, Traditional: 選擇語言
|
||||
assert '选择语言'.encode() in res.data, "Expected Simplified Chinese '选择语言' (Select Language)"
|
||||
assert '選擇語言'.encode() not in res.data, "Should not get Traditional Chinese '選擇語言'"
|
||||
|
||||
# Check HTML lang attribute
|
||||
assert b'<html lang="zh"' in res.data, "Expected language tag zh in HTML"
|
||||
|
||||
# Check that the correct flag icon is shown (China flag for Simplified Chinese)
|
||||
assert b'<span class="fi fi-cn fis" id="language-selector-flag">' in res.data, \
|
||||
"Expected China flag 'fi fi-cn' for Simplified Chinese"
|
||||
assert b'<span class="fi fi-tw fis" id="language-selector-flag">' not in res.data, \
|
||||
"Should not show Taiwan flag 'fi fi-tw' for Simplified Chinese"
|
||||
|
||||
# Test 2: 'zh-CN' should also get Simplified Chinese
|
||||
with client.session_transaction() as sess:
|
||||
sess.clear()
|
||||
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
headers={'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Should get Simplified Chinese content
|
||||
assert '选择语言'.encode() in res.data, "Expected Simplified Chinese '选择语言' with zh-CN header"
|
||||
assert '選擇語言'.encode() not in res.data, "Should not get Traditional Chinese with zh-CN header"
|
||||
|
||||
# Check that the correct flag icon is shown (China flag for zh-CN)
|
||||
assert b'<span class="fi fi-cn fis" id="language-selector-flag">' in res.data, \
|
||||
"Expected China flag 'fi fi-cn' for zh-CN header"
|
||||
|
||||
# Verify Simplified Chinese in settings page
|
||||
res = client.get(
|
||||
url_for("settings.settings_page"),
|
||||
headers={'Accept-Language': 'zh,en;q=0.9'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Check Simplified Chinese translations (not Traditional or English)
|
||||
# Simplified: 小时, Traditional: 小時
|
||||
assert "小时".encode() in res.data, "Expected Simplified Chinese '小时' for Hours"
|
||||
assert "分钟".encode() in res.data, "Expected Simplified Chinese '分钟' for Minutes"
|
||||
assert "秒".encode() in res.data, "Expected Simplified Chinese '秒' for Seconds"
|
||||
# Make sure it's not Traditional Chinese
|
||||
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時'"
|
||||
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘'"
|
||||
|
||||
|
||||
def test_session_locale_overrides_accept_language(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that session locale preference overrides browser Accept-Language header.
|
||||
|
||||
Scenario:
|
||||
1. Browser auto-detects zh-TW (Traditional Chinese) from Accept-Language header
|
||||
2. User explicitly selects Korean language
|
||||
3. On subsequent page loads, Korean should be shown (not Traditional Chinese)
|
||||
even though the Accept-Language header still says zh-TW
|
||||
|
||||
This tests the session override behavior for issue #3779.
|
||||
"""
|
||||
from flask import url_for
|
||||
|
||||
# Step 1: Clear session and make first request with zh-TW header (auto-detect)
|
||||
with client.session_transaction() as sess:
|
||||
sess.clear()
|
||||
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
headers={'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Should initially get Traditional Chinese from auto-detect
|
||||
assert '選擇語言'.encode() in res.data, "Expected Traditional Chinese '選擇語言' from auto-detect"
|
||||
assert b'<html lang="zh-Hant-TW"' in res.data, "Expected zh-Hant-TW language tag"
|
||||
assert b'<span class="fi fi-tw fis" id="language-selector-flag">' in res.data, \
|
||||
"Expected Taiwan flag 'fi fi-tw' from auto-detect"
|
||||
|
||||
# Step 2: User explicitly selects Korean language
|
||||
res = client.get(
|
||||
url_for("set_language", locale="ko"),
|
||||
headers={'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8'}, # Browser still sends zh-TW
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Step 3: Make another request with same zh-TW header
|
||||
# Session should override the Accept-Language header
|
||||
res = client.get(
|
||||
url_for("watchlist.index"),
|
||||
headers={'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8'}, # Still sending zh-TW!
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Should now get Korean (session overrides auto-detect)
|
||||
# Korean: 언어 선택, Traditional Chinese: 選擇語言
|
||||
assert '언어 선택'.encode() in res.data, "Expected Korean '언어 선택' (Select Language) from session"
|
||||
assert '選擇語言'.encode() not in res.data, "Should not get Traditional Chinese when Korean is set in session"
|
||||
|
||||
# Check HTML lang attribute is Korean
|
||||
assert b'<html lang="ko"' in res.data, "Expected Korean language tag 'ko' in HTML"
|
||||
|
||||
# Check that Korean flag is shown (not Taiwan flag)
|
||||
assert b'<span class="fi fi-kr fis" id="language-selector-flag">' in res.data, \
|
||||
"Expected Korean flag 'fi fi-kr' from session preference"
|
||||
assert b'<span class="fi fi-tw fis" id="language-selector-flag">' not in res.data, \
|
||||
"Should not show Taiwan flag when Korean is set in session"
|
||||
|
||||
# Verify Korean text on settings page as well
|
||||
res = client.get(
|
||||
url_for("settings.settings_page"),
|
||||
headers={'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8'}, # Still zh-TW!
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
|
||||
# Check Korean translations (not Traditional Chinese or English)
|
||||
# Korean: 시간 (Hours), 분 (Minutes), 초 (Seconds)
|
||||
# Traditional Chinese: 小時, 分鐘, 秒
|
||||
assert "시간".encode() in res.data, "Expected Korean '시간' for Hours"
|
||||
assert "분".encode() in res.data, "Expected Korean '분' for Minutes"
|
||||
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
|
||||
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
# Translation Guide
|
||||
|
||||
## Updating Translations
|
||||
|
||||
To maintain consistency and minimize unnecessary changes in translation files, run these commands:
|
||||
|
||||
```bash
|
||||
python setup.py extract_messages # Extract translatable strings
|
||||
python setup.py update_catalog # Update all language files
|
||||
python setup.py compile_catalog # Compile to binary .mo files
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
All translation settings are configured in **`../../setup.cfg`** (single source of truth).
|
||||
|
||||
The configuration below is shown for reference - **edit `setup.cfg` to change settings**:
|
||||
|
||||
```ini
|
||||
[extract_messages]
|
||||
# Extract translatable strings from source code
|
||||
mapping_file = babel.cfg
|
||||
output_file = changedetectionio/translations/messages.pot
|
||||
input_paths = changedetectionio
|
||||
keywords = _ _l gettext
|
||||
# Options to reduce unnecessary changes in .pot files
|
||||
sort_by_file = true # Keeps entries ordered by file path
|
||||
width = 120 # Consistent line width (prevents rewrapping)
|
||||
add_location = file # Show file path only (not line numbers)
|
||||
|
||||
[update_catalog]
|
||||
# Update existing .po files with new strings from .pot
|
||||
# Note: 'locale' is omitted - Babel auto-discovers all catalogs in output_dir
|
||||
input_file = changedetectionio/translations/messages.pot
|
||||
output_dir = changedetectionio/translations
|
||||
domain = messages
|
||||
# Options for consistent formatting
|
||||
width = 120 # Consistent line width
|
||||
no_fuzzy_matching = true # Avoids incorrect automatic matches
|
||||
|
||||
[compile_catalog]
|
||||
# Compile .po files to .mo binary format
|
||||
directory = changedetectionio/translations
|
||||
domain = messages
|
||||
```
|
||||
|
||||
**Key formatting options:**
|
||||
- `sort_by_file = true` - Orders entries by file path (consistent ordering)
|
||||
- `width = 120` - Fixed line width prevents text rewrapping
|
||||
- `add_location = file` - Shows file path only, not line numbers (reduces git churn)
|
||||
- `no_fuzzy_matching = true` - Prevents incorrect automatic fuzzy matches
|
||||
|
||||
## Why Use These Commands?
|
||||
|
||||
Running pybabel commands directly without consistent options causes:
|
||||
- ❌ Entries get reordered differently each time
|
||||
- ❌ Text gets rewrapped at different widths
|
||||
- ❌ Line numbers change every edit (if not configured)
|
||||
- ❌ Large diffs that make code review difficult
|
||||
|
||||
Using `python setup.py` commands ensures:
|
||||
- ✅ Consistent ordering (by file path, not alphabetically)
|
||||
- ✅ Consistent line width (120 characters, no rewrapping)
|
||||
- ✅ File-only locations (no line number churn)
|
||||
- ✅ No fuzzy matching (prevents incorrect auto-translations)
|
||||
- ✅ Minimal diffs (only actual changes show up)
|
||||
- ✅ Easier code review and git history
|
||||
|
||||
These commands read settings from `../../setup.cfg` automatically.
|
||||
|
||||
## Supported Languages
|
||||
|
||||
- `cs` - Czech (Čeština)
|
||||
- `de` - German (Deutsch)
|
||||
- `en_GB` - English (UK)
|
||||
- `en_US` - English (US)
|
||||
- `fr` - French (Français)
|
||||
- `it` - Italian (Italiano)
|
||||
- `ko` - Korean (한국어)
|
||||
- `zh` - Chinese Simplified (中文简体)
|
||||
- `zh_Hant_TW` - Chinese Traditional (繁體中文)
|
||||
|
||||
## Adding a New Language
|
||||
|
||||
1. Initialize the new language catalog:
|
||||
```bash
|
||||
pybabel init -i changedetectionio/translations/messages.pot -d changedetectionio/translations -l NEW_LANG_CODE
|
||||
```
|
||||
2. Compile it:
|
||||
```bash
|
||||
python setup.py compile_catalog
|
||||
```
|
||||
|
||||
Babel will auto-discover the new language on subsequent translation updates.
|
||||
|
||||
## Translation Notes
|
||||
|
||||
From CLAUDE.md:
|
||||
- Always use "monitor" or "watcher" terminology (not "clock")
|
||||
- Use the most brief wording suitable
|
||||
- When finding issues in one language, check ALL languages for the same issue
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -132,19 +132,11 @@ async def start_single_async_worker(worker_id, update_q, notification_q, app, da
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
result = await async_update_worker(worker_id, update_q, notification_q, app, datastore, executor)
|
||||
|
||||
if result == "restart":
|
||||
# Worker requested restart - immediately loop back and restart
|
||||
if not in_pytest:
|
||||
logger.debug(f"Async worker {worker_id} restarting")
|
||||
continue
|
||||
else:
|
||||
# Worker exited cleanly (shutdown)
|
||||
if not in_pytest:
|
||||
logger.info(f"Async worker {worker_id} exited cleanly")
|
||||
break
|
||||
|
||||
await async_update_worker(worker_id, update_q, notification_q, app, datastore, executor)
|
||||
# If we reach here, worker exited cleanly
|
||||
if not in_pytest:
|
||||
logger.info(f"Async worker {worker_id} exited cleanly")
|
||||
break
|
||||
except asyncio.CancelledError:
|
||||
# Task was cancelled (normal shutdown)
|
||||
if not in_pytest:
|
||||
@@ -155,7 +147,7 @@ async def start_single_async_worker(worker_id, update_q, notification_q, app, da
|
||||
if not in_pytest:
|
||||
logger.info(f"Restarting async worker {worker_id} in 5 seconds...")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
|
||||
if not in_pytest:
|
||||
logger.info(f"Async worker {worker_id} shutdown complete")
|
||||
|
||||
@@ -169,11 +161,7 @@ def add_worker(update_q, notification_q, app, datastore):
|
||||
"""Add a new async worker (for dynamic scaling)"""
|
||||
global worker_threads
|
||||
|
||||
# Reuse lowest available ID to prevent unbounded growth over time
|
||||
used_ids = {w.worker_id for w in worker_threads}
|
||||
worker_id = 0
|
||||
while worker_id in used_ids:
|
||||
worker_id += 1
|
||||
worker_id = len(worker_threads)
|
||||
logger.info(f"Adding async worker {worker_id}")
|
||||
|
||||
try:
|
||||
@@ -263,7 +251,7 @@ def queue_item_async_safe(update_q, item, silent=False):
|
||||
return False
|
||||
|
||||
if not silent:
|
||||
logger.trace(f"Successfully queued item: {item_uuid}")
|
||||
logger.debug(f"Successfully queued item: {item_uuid}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -91,7 +91,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
|
||||
|
||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
|
||||
|
||||
pyppeteer-ng==2.0.0rc12
|
||||
pyppeteer-ng==2.0.0rc11
|
||||
pyppeteerstealth>=0.0.4
|
||||
|
||||
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup
|
||||
|
||||
28
setup.cfg
28
setup.cfg
@@ -1,28 +0,0 @@
|
||||
# Translation configuration for changedetection.io
|
||||
# See changedetectionio/translations/README.md for full documentation on updating translations
|
||||
|
||||
[extract_messages]
|
||||
# Extract translatable strings from source code
|
||||
mapping_file = babel.cfg
|
||||
output_file = changedetectionio/translations/messages.pot
|
||||
input_paths = changedetectionio
|
||||
keywords = _ _l gettext
|
||||
# Options to reduce unnecessary changes in .pot files
|
||||
sort_by_file = true
|
||||
width = 120
|
||||
add_location = file
|
||||
|
||||
[update_catalog]
|
||||
# Update existing .po files with new strings from .pot
|
||||
# Note: Omitting 'locale' makes Babel auto-discover all catalogs in output_dir
|
||||
input_file = changedetectionio/translations/messages.pot
|
||||
output_dir = changedetectionio/translations
|
||||
domain = messages
|
||||
# Options for consistent formatting
|
||||
width = 120
|
||||
no_fuzzy_matching = true
|
||||
|
||||
[compile_catalog]
|
||||
# Compile .po files to .mo binary format
|
||||
directory = changedetectionio/translations
|
||||
domain = messages
|
||||
Reference in New Issue
Block a user