Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot]
fee3a9f059 Update selenium requirement from ~=4.31.0 to ~=4.39.0
Updates the requirements on [selenium](https://github.com/SeleniumHQ/Selenium) to permit the latest version.
- [Release notes](https://github.com/SeleniumHQ/Selenium/releases)
- [Commits](https://github.com/SeleniumHQ/Selenium/compare/selenium-4.31.0...selenium-4.39.0)

---
updated-dependencies:
- dependency-name: selenium
  dependency-version: 4.39.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-22 00:11:47 +00:00
51 changed files with 162 additions and 5200 deletions

View File

@@ -7,8 +7,6 @@ ENV PYTHONUNBUFFERED=1
COPY requirements.txt /requirements.txt
ARG TARGETPLATFORM
RUN \
apk add --update --no-cache --virtual=build-dependencies \
build-base \
@@ -29,19 +27,7 @@ RUN \
file \
nodejs \
poppler-utils \
python3 \
glib \
libsm \
libxext \
libxrender && \
case "$TARGETPLATFORM" in \
linux/arm/v7|linux/arm/v8) \
echo "INFO: Skipping py3-opencv on $TARGETPLATFORM (using pixelmatch fallback)" \
;; \
*) \
apk add --update --no-cache py3-opencv || echo "WARN: py3-opencv install failed, using pixelmatch fallback" \
;; \
esac && \
python3 && \
echo "**** pip3 install test of changedetection.io ****" && \
python3 -m venv /lsiopy && \
pip install -U pip wheel setuptools && \

View File

@@ -34,7 +34,6 @@ ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
# Additional environment variables for cryptography Rust build
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
pip install \
--prefer-binary \
@@ -44,6 +43,7 @@ RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
--target=/dependencies \
-r /requirements.txt
# Playwright is an alternative to Selenium
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
@@ -55,25 +55,6 @@ RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
playwright~=1.56.0 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
# OpenCV is optional for fast image comparison (pixelmatch is the fallback)
# Skip on arm/v7 and arm/v8 where builds take weeks - excluded from requirements.txt
ARG TARGETPLATFORM
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
case "$TARGETPLATFORM" in \
linux/arm/v7|linux/arm/v8) \
echo "INFO: Skipping OpenCV on $TARGETPLATFORM (build takes too long), using pixelmatch fallback" \
;; \
*) \
pip install \
--prefer-binary \
--extra-index-url https://www.piwheels.org/simple \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
opencv-python-headless>=4.8.0.76 \
|| echo "WARN: OpenCV install failed, will use pixelmatch fallback" \
;; \
esac
# Final image stage
FROM python:${PYTHON_VERSION}-slim-bookworm
@@ -88,11 +69,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# favicon type detection and other uses
file \
zlib1g \
# OpenCV dependencies for image processing
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

View File

@@ -6,7 +6,6 @@ __version__ = '0.51.4'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
import logging
import os
import getopt
import platform
@@ -166,11 +165,6 @@ def main():
" WARNING, ERROR, CRITICAL")
sys.exit(2)
# Disable verbose pyppeteer logging to prevent memory leaks from large CDP messages
# Set both parent and child loggers since pyppeteer hardcodes DEBUG level
logging.getLogger('pyppeteer.connection').setLevel(logging.WARNING)
logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING)
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
app_config = {'datastore_path': datastore_path}

View File

@@ -2,13 +2,12 @@ import os
from changedetectionio.validate_url import is_safe_valid_url
from . import auth
from flask_expects_json import expects_json
from changedetectionio import queuedWatchMetaData, strtobool
from changedetectionio import worker_handler
from flask import request, make_response, send_from_directory
from flask_expects_json import expects_json
from flask_restful import abort, Resource
from loguru import logger
from flask import request, make_response, send_from_directory
from . import auth
import copy
# Import schemas from __init__.py
@@ -128,60 +127,7 @@ class Watch(Resource):
if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
return "Invalid URL", 400
# Handle processor-config-* fields separately (save to JSON, not datastore)
from changedetectionio import processors
processor_config_data = {}
regular_data = {}
for key, value in request.json.items():
if key.startswith('processor_config_'):
config_key = key.replace('processor_config_', '')
if value: # Only save non-empty values
processor_config_data[config_key] = value
else:
regular_data[key] = value
# Update watch with regular (non-processor-config) fields
watch.update(regular_data)
# Save processor config to JSON file if any config data exists
if processor_config_data:
try:
processor_name = request.json.get('processor', watch.get('processor'))
if processor_name:
# Create a processor instance to access config methods
from changedetectionio.processors import difference_detection_processor
processor_instance = difference_detection_processor(self.datastore, uuid)
# Use processor name as filename so each processor keeps its own config
config_filename = f'{processor_name}.json'
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
# Call optional edit_hook if processor has one
try:
import importlib
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
try:
edit_hook = importlib.import_module(edit_hook_module_name)
logger.debug(f"API: Found edit_hook module for {processor_name}")
if hasattr(edit_hook, 'on_config_save'):
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
# Call hook and get updated config
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
# Save updated config back to file
processor_instance.update_extra_watch_config(config_filename, updated_config)
logger.info(f"API: Edit hook updated config: {updated_config}")
else:
logger.debug(f"API: Edit hook module found but no on_config_save function")
except ModuleNotFoundError:
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
except Exception as hook_error:
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
except Exception as e:
logger.error(f"API: Failed to save processor config: {e}")
watch.update(request.json)
return "OK", 200

View File

@@ -42,13 +42,13 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
try:
# Use native janus async interface - no threads needed!
queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
except asyncio.TimeoutError:
# No jobs available, continue loop
continue
except Exception as e:
logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
# Log queue health for debugging
try:
queue_size = q.qsize()
@@ -56,28 +56,15 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}")
except Exception as health_e:
logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}")
await asyncio.sleep(0.1)
continue
uuid = queued_item_data.item.get('uuid')
# RACE CONDITION FIX: Check if this UUID is already being processed by another worker
from changedetectionio import worker_handler
from changedetectionio.queuedWatchMetaData import PrioritizedItem
if worker_handler.is_watch_running(uuid):
logger.trace(f"Worker {worker_id} skipping UUID {uuid} - already being processed, re-queuing for later")
# Re-queue with MUCH lower priority (higher number = processed later)
# This prevents tight loop where high-priority item keeps getting picked immediately
deferred_priority = max(1000, queued_item_data.priority * 10)
deferred_item = PrioritizedItem(priority=deferred_priority, item=queued_item_data.item)
worker_handler.queue_item_async_safe(q, deferred_item, silent=True)
await asyncio.sleep(0.1) # Brief pause to avoid tight loop
continue
fetch_start_time = round(time.time())
# Mark this UUID as being processed
from changedetectionio import worker_handler
worker_handler.set_uuid_processing(uuid, processing=True)
try:
@@ -102,8 +89,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
processor = watch.get('processor', 'text_json_diff')
# Init a new 'difference_detection_processor'
processor_module_name = f"changedetectionio.processors.{processor}.processor"
try:
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
processor_module = importlib.import_module(processor_module_name)
except ModuleNotFoundError as e:
print(f"Processor module '{processor}' not found.")
raise e
@@ -445,10 +433,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
# 3. GC can't collect the object anyway (still referenced by datastore)
# 4. It would just cause confusion
# Force garbage collection after cleanup
import gc
gc.collect()
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
except Exception as cleanup_error:
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")

View File

@@ -1,5 +1,6 @@
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory
import os
import time
import re
import importlib
from loguru import logger
@@ -239,73 +240,4 @@ def construct_blueprint(datastore: ChangeDetectionStore):
redirect=redirect
)
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
@login_optionally_required
def processor_asset(uuid, asset_name):
"""
Serve processor-specific binary assets (images, files, etc.).
This route is processor-aware: it delegates to the processor's
difference.py module, allowing different processor types to serve
custom assets without embedding them as base64 in templates.
This solves memory issues with large binary data (e.g., screenshots)
by streaming them as separate HTTP responses instead of embedding
in the HTML template.
Each processor implements processors/{type}/difference.py::get_asset()
which returns (binary_data, content_type, cache_control_header).
Example URLs:
- /diff/{uuid}/processor-asset/before
- /diff/{uuid}/processor-asset/after
- /diff/{uuid}/processor-asset/rendered_diff
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's difference module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
# Call the processor's get_asset() function
if hasattr(processor_module, 'get_asset'):
result = processor_module.get_asset(
asset_name=asset_name,
watch=watch,
datastore=datastore,
request=request
)
if result is None:
from flask import abort
abort(404, description=f"Asset '{asset_name}' not found")
binary_data, content_type, cache_control = result
response = make_response(binary_data)
response.headers['Content-Type'] = content_type
if cache_control:
response.headers['Cache-Control'] = cache_control
return response
else:
logger.warning(f"Processor {processor_name} does not implement get_asset()")
from flask import abort
abort(404, description=f"Processor '{processor_name}' does not support assets")
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
from flask import abort
abort(404, description=f"Processor '{processor_name}' not found")
return diff_blueprint

View File

@@ -96,25 +96,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
form.datastore = datastore
form.watch = default
# Load processor-specific config from JSON file for GET requests
if request.method == 'GET' and processor_name:
try:
# Create a processor instance to access config methods
processor_instance = processors.difference_detection_processor(datastore, uuid)
# Use processor name as filename so each processor keeps its own config
config_filename = f'{processor_name}.json'
processor_config = processor_instance.get_extra_watch_config(config_filename)
if processor_config:
# Populate processor-config-* fields from JSON
for config_key, config_value in processor_config.items():
field_name = f'processor_config_{config_key}'
if hasattr(form, field_name):
getattr(form, field_name).data = config_value
logger.debug(f"Loaded processor config from {config_filename}: {field_name} = {config_value}")
except Exception as e:
logger.warning(f"Failed to load processor config: {e}")
for p in datastore.extra_browsers:
form.fetch_backend.choices.append(p)
@@ -148,60 +129,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
extra_update_obj['time_between_check'] = form.time_between_check.data
# Handle processor-config-* fields separately (save to JSON, not datastore)
processor_config_data = {}
fields_to_remove = []
for field_name, field_value in form.data.items():
if field_name.startswith('processor_config_'):
config_key = field_name.replace('processor_config_', '')
if field_value: # Only save non-empty values
processor_config_data[config_key] = field_value
fields_to_remove.append(field_name)
# Save processor config to JSON file if any config data exists
if processor_config_data:
try:
processor_name = form.data.get('processor')
# Create a processor instance to access config methods
processor_instance = processors.difference_detection_processor(datastore, uuid)
# Use processor name as filename so each processor keeps its own config
config_filename = f'{processor_name}.json'
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
# Call optional edit_hook if processor has one
try:
# Try to import the edit_hook module from the processor package
import importlib
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
try:
edit_hook = importlib.import_module(edit_hook_module_name)
logger.debug(f"Found edit_hook module for {processor_name}")
if hasattr(edit_hook, 'on_config_save'):
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
watch_obj = datastore.data['watching'][uuid]
# Call hook and get updated config
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
# Save updated config back to file
processor_instance.update_extra_watch_config(config_filename, updated_config)
logger.info(f"Edit hook updated config: {updated_config}")
else:
logger.debug(f"Edit hook module found but no on_config_save function")
except ModuleNotFoundError:
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
except Exception as hook_error:
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
except Exception as e:
logger.error(f"Failed to save processor config: {e}")
# Remove processor-config-* fields from form.data before updating datastore
for field_name in fields_to_remove:
form.data.pop(field_name, None)
# Ignore text
# Ignore text
form_ignore_text = form.ignore_text.data
datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text
@@ -303,17 +231,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# Get fetcher capabilities instead of hardcoded logic
capabilities = get_fetcher_capabilities(watch, datastore)
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
c = [f"processor-{watch.get('processor')}"]
if worker_handler.is_watch_running(uuid):
c.append('checking-now')
template_args = {
'available_processors': processors.available_processors(),
'available_timezones': sorted(available_timezones()),
'browser_steps_config': browser_step_ui_config,
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
'extra_classes': ' '.join(c),
'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '',
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
'extra_processor_config': form.extra_tab_content(),
'extra_title': f" - Edit - {watch.label}",

View File

@@ -12,19 +12,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
@preview_blueprint.route("/preview/<string:uuid>", methods=['GET'])
@login_optionally_required
def preview_page(uuid):
"""
Render the preview page for a watch.
content = []
versions = []
timestamp = None
This route is processor-aware: it delegates rendering to the processor's
preview.py module, allowing different processor types to provide
custom visualizations:
- text_json_diff: Text preview with syntax highlighting
- image_ssim_diff: Image preview with proper rendering
- restock_diff: Could show latest price/stock data
Each processor implements processors/{type}/preview.py::render()
If a processor doesn't have a preview module, falls back to default text preview.
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
@@ -35,33 +26,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's preview module
import importlib
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
# Call the processor's render() function
if hasattr(processor_module, 'render'):
return processor_module.render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
# Fallback: if processor doesn't have preview module, use default text preview
content = []
versions = []
timestamp = None
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
@@ -128,73 +92,4 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return output
@preview_blueprint.route("/preview/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
@login_optionally_required
def processor_asset(uuid, asset_name):
"""
Serve processor-specific binary assets for preview (images, files, etc.).
This route is processor-aware: it delegates to the processor's
preview.py module, allowing different processor types to serve
custom assets without embedding them as base64 in templates.
This solves memory issues with large binary data by streaming them
as separate HTTP responses instead of embedding in the HTML template.
Each processor implements processors/{type}/preview.py::get_asset()
which returns (binary_data, content_type, cache_control_header).
Example URLs:
- /preview/{uuid}/processor-asset/screenshot?version=123456789
"""
from flask import make_response
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's preview module
import importlib
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
# Call the processor's get_asset() function
if hasattr(processor_module, 'get_asset'):
result = processor_module.get_asset(
asset_name=asset_name,
watch=watch,
datastore=datastore,
request=request
)
if result is None:
from flask import abort
abort(404, description=f"Asset '{asset_name}' not found")
binary_data, content_type, cache_control = result
response = make_response(binary_data)
response.headers['Content-Type'] = content_type
if cache_control:
response.headers['Cache-Control'] = cache_control
return response
else:
logger.warning(f"Processor {processor_name} does not implement get_asset()")
from flask import abort
abort(404, description=f"Processor '{processor_name}' does not support assets")
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
from flask import abort
abort(404, description=f"Processor '{processor_name}' not found")
return preview_blueprint

View File

@@ -50,7 +50,7 @@
{% endif %}
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
<!-- should goto extra forms? -->
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
{% if watch['processor'] == 'text_json_diff' %}
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
<li class="tab" id="conditions-tab"><a href="#conditions">Conditions</a></li>
@@ -284,7 +284,7 @@ Math: {{ 1 + 1 }}") }}
</fieldset>
</div>
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
{% if watch['processor'] == 'text_json_diff' %}
<div class="tab-pane-inner" id="conditions">
<script>
@@ -375,7 +375,7 @@ Math: {{ 1 + 1 }}") }}
{{ extra_form_content|safe }}
</div>
{% endif %}
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
{% if watch['processor'] == 'text_json_diff' %}
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
@@ -387,22 +387,6 @@ Math: {{ 1 + 1 }}") }}
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
</span>
{% if watch['processor'] == 'image_ssim_diff' %}
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
<label style="font-weight: 600; margin-right: 15px;">Selection Mode:</label>
<label style="margin-right: 15px;">
<input type="radio" name="selector-mode" value="element" style="margin-right: 5px;">
Select by element
</label>
<label>
<input type="radio" name="selector-mode" value="draw" checked style="margin-right: 5px;">
Draw area
</label>
{{ render_field(form.processor_config_bounding_box) }}
{{ render_field(form.processor_config_selection_mode) }}
</div>
{% endif %}
<div id="selector-header">
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
<!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->

View File

@@ -6,7 +6,6 @@ from flask_login import current_user
from flask_paginate import Pagination, get_page_parameter
from changedetectionio import forms
from changedetectionio import processors
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
@@ -91,9 +90,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
now_time_server=round(time.time()),
pagination=pagination,
processor_badge_texts=processors.get_processor_badge_texts(),
processor_descriptions=processors.get_processor_descriptions(),
processor_badge_css=processors.get_processor_badge_css(),
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
search_q=request.args.get('q', '').strip(),
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),

View File

@@ -18,9 +18,6 @@ document.addEventListener('DOMContentLoaded', function() {
background-repeat: no-repeat;
transition: background-size 0.9s ease
}
/* Auto-generated processor badge colors */
{{ processor_badge_css|safe }}
</style>
<div class="box" id="form-quick-watch-add">
@@ -176,8 +173,8 @@ document.addEventListener('DOMContentLoaded', function() {
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
{%- endif -%}
{%- endif -%}
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
{%- if watch['processor'] == 'restock_diff' -%}
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
{%- endif -%}
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
<span class="watch-tag-list">{{ watch_tag.title }}</span>

View File

@@ -21,9 +21,7 @@ SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_
# The size at which we will switch to stitching method, when below this (and
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
# screenshot method.
# Increased from 8000 to 10000 for better performance (fewer chunks = faster)
# Most modern GPUs support 16384x16384 textures, so 1280x10000 is safe
SCREENSHOT_SIZE_STITCH_THRESHOLD = int(os.getenv("SCREENSHOT_CHUNK_HEIGHT", 10000))
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
# available_fetchers() will scan this implementation looking for anything starting with html_
# this information is used in the form selections

View File

@@ -51,7 +51,6 @@ class Fetcher():
favicon_blob = None
instock_data = None
instock_data_js = ""
screenshot_format = None
status_code = None
webdriver_js_execute_code = None
xpath_data = None
@@ -71,11 +70,6 @@ class Fetcher():
supports_screenshots = False # Can capture page screenshots
supports_xpath_element_data = False # Can extract xpath element positions/data for visual selector
def __init__(self, **kwargs):
if kwargs and 'screenshot_format' in kwargs:
self.screenshot_format = kwargs.get('screenshot_format')
@classmethod
def get_status_icon_data(cls):
"""Return data for status icon to display in the watch overview.

View File

@@ -9,7 +9,7 @@ from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, vi
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
async def capture_full_page_async(page, screenshot_format='JPEG'):
async def capture_full_page_async(page):
import os
import time
from multiprocessing import Process, Pipe
@@ -26,20 +26,8 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
screenshot_chunks = []
y = 0
elements_locked = False
if page_height > page.viewport_size['height']:
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
# capture_full_page_async() changes viewport height which triggers @media (min-height) rules
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
with open(lock_elements_js_path, 'r') as f:
lock_elements_js = f.read()
await page.evaluate(lock_elements_js)
elements_locked = True
logger.debug("Element dimensions locked before screenshot capture")
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
@@ -47,66 +35,36 @@ async def capture_full_page_async(page, screenshot_format='JPEG'):
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
# Capture screenshots in chunks up to the max total height
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
# PNG should use quality 100, JPEG uses configurable quality
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
# Only scroll if not at the top (y > 0)
if y > 0:
await page.evaluate(f"window.scrollTo(0, {y})")
# Request GC only before screenshot (not 3x per chunk)
await page.request_gc()
screenshot_kwargs = {
'type': screenshot_type,
'full_page': False
}
# Only pass quality parameter for jpeg (PNG doesn't support it in Playwright)
if screenshot_type == 'jpeg':
screenshot_kwargs['quality'] = screenshot_quality
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
await page.evaluate(f"window.scrollTo(0, {y})")
await page.request_gc()
screenshot_chunks.append(await page.screenshot(
type="jpeg",
full_page=False,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
))
y += step_size
await page.request_gc()
# Restore original viewport size
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
# Unlock element dimensions if they were locked
if elements_locked:
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'unlock-elements-sizing.js')
with open(unlock_elements_js_path, 'r') as f:
unlock_elements_js = f.read()
await page.evaluate(unlock_elements_js)
logger.debug("Element dimensions unlocked after screenshot capture")
# If we have multiple chunks, stitch them together
if len(screenshot_chunks) > 1:
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
# For small number of chunks (2-3), stitch inline to avoid multiprocessing overhead
# Only use separate process for many chunks (4+) to avoid blocking the event loop
if len(screenshot_chunks) <= 3:
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_inline
screenshot = stitch_images_inline(screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)
else:
# Use separate process for many chunks to avoid blocking
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
parent_conn, child_conn = Pipe()
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
screenshot = parent_conn.recv_bytes()
p.join()
# Explicit cleanup
del p
del parent_conn, child_conn
parent_conn, child_conn = Pipe()
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
screenshot = parent_conn.recv_bytes()
p.join()
logger.debug(
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
# Explicit cleanup
del screenshot_chunks
del p
del parent_conn, child_conn
screenshot_chunks = None
return screenshot
@@ -145,8 +103,8 @@ class fetcher(Fetcher):
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
@@ -181,7 +139,7 @@ class fetcher(Fetcher):
async def screenshot_step(self, step_n=''):
super().screenshot_step(step_n=step_n)
screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
screenshot = await capture_full_page_async(page=self.page)
if self.browser_steps_screenshot_path is not None:
@@ -207,7 +165,6 @@ class fetcher(Fetcher):
request_body=None,
request_headers=None,
request_method=None,
screenshot_format=None,
timeout=None,
url=None,
watch_uuid=None,
@@ -302,7 +259,7 @@ class fetcher(Fetcher):
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format)
screenshot = await capture_full_page_async(self.page)
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
@@ -351,7 +308,7 @@ class fetcher(Fetcher):
# acceptable screenshot quality here
try:
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
self.screenshot = await capture_full_page_async(page=self.page, screenshot_format=self.screenshot_format)
self.screenshot = await capture_full_page_async(page=self.page)
except Exception as e:
# It's likely the screenshot was too long/big and something crashed

View File

@@ -20,7 +20,7 @@ from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
async def capture_full_page(page, screenshot_format='JPEG'):
async def capture_full_page(page):
import os
import time
from multiprocessing import Process, Pipe
@@ -41,25 +41,11 @@ async def capture_full_page(page, screenshot_format='JPEG'):
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
# Use PNG for better quality (no compression artifacts), JPEG for smaller size
screenshot_type = screenshot_format.lower() if screenshot_format else 'jpeg'
# PNG should use quality 100, JPEG uses configurable quality
screenshot_quality = 100 if screenshot_type == 'png' else int(os.getenv("SCREENSHOT_QUALITY", 72))
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot
screenshot_chunks = []
y = 0
elements_locked = False
if page_height > page.viewport['height']:
# Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing
# capture_full_page() changes viewport height which triggers @media (min-height) rules
lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js')
with open(lock_elements_js_path, 'r') as f:
lock_elements_js = f.read()
await page.evaluate(lock_elements_js)
elements_locked = True
logger.debug("Element dimensions locked before screenshot capture")
if page_height < step_size:
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
@@ -74,27 +60,13 @@ async def capture_full_page(page, screenshot_format='JPEG'):
y
)
screenshot_kwargs = {
'type_': screenshot_type,
'fullPage': False
}
# PNG doesn't support quality parameter in Puppeteer
if screenshot_type == 'jpeg':
screenshot_kwargs['quality'] = screenshot_quality
screenshot_chunks.append(await page.screenshot(**screenshot_kwargs))
screenshot_chunks.append(await page.screenshot(type_='jpeg',
fullPage=False,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
y += step_size
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
# Unlock element dimensions if they were locked
if elements_locked:
unlock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'unlock-elements-sizing.js')
with open(unlock_elements_js_path, 'r') as f:
unlock_elements_js = f.read()
await page.evaluate(unlock_elements_js)
logger.debug("Element dimensions unlocked after screenshot capture")
if len(screenshot_chunks) > 1:
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
@@ -140,8 +112,8 @@ class fetcher(Fetcher):
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
if custom_browser_connection_url:
self.browser_connection_is_custom = True
@@ -195,7 +167,6 @@ class fetcher(Fetcher):
request_body,
request_headers,
request_method,
screenshot_format,
timeout,
url,
watch_uuid
@@ -240,6 +211,7 @@ class fetcher(Fetcher):
"height": int(match.group(2))
})
logger.debug(f"Puppeteer viewport size {self.page.viewport}")
try:
from pyppeteerstealth import inject_evasions_into_page
except ImportError:
@@ -344,7 +316,7 @@ class fetcher(Fetcher):
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
screenshot = await capture_full_page(page=self.page)
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
@@ -371,12 +343,6 @@ class fetcher(Fetcher):
await self.page.evaluate(f"var include_filters=''")
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
self.content = await self.page.content
# Now take screenshot (scrolling may trigger layout changes, but measurements are already captured)
logger.debug(f"Screenshot format {self.screenshot_format}")
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format)
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
"visualselector_xpath_selectors": visualselector_xpath_selectors,
"max_height": MAX_TOTAL_HEIGHT
@@ -384,9 +350,12 @@ class fetcher(Fetcher):
if not self.xpath_data:
raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)")
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
self.content = await self.page.content
self.screenshot = await capture_full_page(page=self.page)
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
logger.success(f"Fetching '{url}' complete, closing page")
await self.page.close()
@@ -406,7 +375,6 @@ class fetcher(Fetcher):
request_body=None,
request_headers=None,
request_method=None,
screenshot_format=None,
timeout=None,
url=None,
watch_uuid=None,
@@ -426,7 +394,6 @@ class fetcher(Fetcher):
request_body=request_body,
request_headers=request_headers,
request_method=request_method,
screenshot_format=None,
timeout=timeout,
url=url,
watch_uuid=watch_uuid,

View File

@@ -12,8 +12,8 @@ from changedetectionio.content_fetchers.base import Fetcher
class fetcher(Fetcher):
fetcher_description = "Basic fast Plaintext/HTTP Client"
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
self.proxy_override = proxy_override
# browser_connection_url is none because its always 'launched locally'
@@ -120,12 +120,6 @@ class fetcher(Fetcher):
self.raw_content = r.content
# If the content is an image, set it as screenshot for SSIM/visual comparison
content_type = r.headers.get('content-type', '').lower()
if 'image/' in content_type:
self.screenshot = r.content
logger.debug(f"Image content detected ({content_type}), set as screenshot for comparison")
async def run(self,
fetch_favicon=True,
current_include_filters=None,
@@ -135,7 +129,6 @@ class fetcher(Fetcher):
request_body=None,
request_headers=None,
request_method=None,
screenshot_format=None,
timeout=None,
url=None,
watch_uuid=None,

View File

@@ -1,93 +0,0 @@
/**
* Lock Element Dimensions for Screenshot Capture
*
* THE PROBLEM:
* When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to:
* 1. Temporarily change the viewport height to a large value (e.g., 800px → 3809px)
* 2. Take screenshots in chunks while scrolling
* 3. Stitch the chunks together
*
* However, changing the viewport height triggers CSS media queries like:
* @media (min-height: 860px) { .ad { height: 250px; } }
*
* This causes elements (especially ads) to resize during screenshot capture, creating a mismatch:
* - Screenshot shows element at NEW size (after media query triggered)
* - xpath element coordinates measured at OLD size (before viewport change)
* - Visual selector overlays don't align with screenshot
*
* EXAMPLE BUG:
* - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓
* - Viewport changes to 1280x3809 for screenshot
* - Media query triggers: ad expands to 250px
* - All content below shifts down by 112px (250-138)
* - Article now at position: 391px (279+112)
* - But xpath data says 279px → 112px mismatch! ✗
*
* THE SOLUTION:
* Before changing viewport, lock ALL element dimensions with !important inline styles.
* Inline styles with !important override media query CSS, preventing layout changes.
*
* WHAT THIS SCRIPT DOES:
* 1. Iterates through every element on the page
* 2. Captures current computed dimensions (width, height)
* 3. Sets inline styles with !important to freeze those dimensions
* 4. Disables ResizeObserver API (for JS-based resizing)
* 5. When viewport changes for screenshot, media queries can't resize anything
* 6. Layout remains consistent → xpath coordinates match screenshot ✓
*
* USAGE:
* Execute this script BEFORE calling capture_full_page() / screenshot functions.
* The page must be fully loaded and settled at its initial viewport size.
* No need to restore state afterward - page is closed after screenshot.
*
* PERFORMANCE:
* - Iterates all DOM elements (can be 1000s on complex pages)
* - Typically completes in 50-200ms
* - One-time cost before screenshot, well worth it for coordinate accuracy
*
* @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX
*/
(() => {
// Store original styles in a global WeakMap for later restoration
window.__elementSizingRestore = new WeakMap();
// Lock ALL element dimensions to prevent media query layout changes
document.querySelectorAll('*').forEach(el => {
const computed = window.getComputedStyle(el);
const rect = el.getBoundingClientRect();
// Save original inline style values BEFORE locking
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
const originalStyles = {};
properties.forEach(prop => {
originalStyles[prop] = {
value: el.style.getPropertyValue(prop),
priority: el.style.getPropertyPriority(prop)
};
});
window.__elementSizingRestore.set(el, originalStyles);
// Lock dimensions with !important to override media queries
if (rect.height > 0) {
el.style.setProperty('height', computed.height, 'important');
el.style.setProperty('min-height', computed.height, 'important');
el.style.setProperty('max-height', computed.height, 'important');
}
if (rect.width > 0) {
el.style.setProperty('width', computed.width, 'important');
el.style.setProperty('min-width', computed.width, 'important');
el.style.setProperty('max-width', computed.width, 'important');
}
});
// Also disable ResizeObserver for JS-based resizing
window.ResizeObserver = class {
constructor() {}
observe() {}
unobserve() {}
disconnect() {}
};
console.log('✓ Element dimensions locked to prevent media query changes during screenshot');
})();

View File

@@ -1,52 +0,0 @@
/**
* Unlock Element Dimensions After Screenshot Capture
*
* This script removes the inline !important styles that were applied by lock-elements-sizing.js
* and restores elements to their original state using the WeakMap created during locking.
*
* USAGE:
* Execute this script AFTER completing screenshot capture and restoring the viewport.
* This allows the page to return to its normal responsive behavior.
*
* WHAT THIS SCRIPT DOES:
* 1. Iterates through every element that was locked
* 2. Reads original style values from the global WeakMap
* 3. Restores original inline styles (or removes them if they weren't set originally)
* 4. Cleans up the WeakMap
*
* @see lock-elements-sizing.js for the locking mechanism
*/
(() => {
// Check if the restore map exists
if (!window.__elementSizingRestore) {
console.log('⚠ Element sizing restore map not found - elements may not have been locked');
return;
}
// Restore all locked dimension styles to their original state
document.querySelectorAll('*').forEach(el => {
const originalStyles = window.__elementSizingRestore.get(el);
if (originalStyles) {
const properties = ['height', 'min-height', 'max-height', 'width', 'min-width', 'max-width'];
properties.forEach(prop => {
const original = originalStyles[prop];
if (original.value) {
// Restore original value with original priority
el.style.setProperty(prop, original.value, original.priority || '');
} else {
// Was not set originally, so remove it
el.style.removeProperty(prop);
}
});
}
});
// Clean up the global WeakMap
delete window.__elementSizingRestore;
console.log('✓ Element dimensions unlocked - page restored to original state');
})();

View File

@@ -8,90 +8,14 @@ from loguru import logger
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
# Cache font to avoid loading on every stitch
_cached_font = None
def _get_caption_font():
"""Get or create cached font for caption text."""
global _cached_font
if _cached_font is None:
from PIL import ImageFont
try:
_cached_font = ImageFont.truetype("arial.ttf", 35)
except IOError:
_cached_font = ImageFont.load_default()
return _cached_font
def stitch_images_inline(chunks_bytes, original_page_height, capture_height):
"""
Stitch image chunks together inline (no multiprocessing).
Optimized for small number of chunks (2-3) to avoid process creation overhead.
Args:
chunks_bytes: List of JPEG image bytes
original_page_height: Original page height in pixels
capture_height: Maximum capture height
Returns:
bytes: Stitched JPEG image
"""
import os
import io
from PIL import Image, ImageDraw
# Load images from byte chunks
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
total_height = sum(im.height for im in images)
max_width = max(im.width for im in images)
# Create stitched image
stitched = Image.new('RGB', (max_width, total_height))
y_offset = 0
for im in images:
stitched.paste(im, (0, y_offset))
y_offset += im.height
im.close() # Close immediately after pasting
# Draw caption only if page was trimmed
if original_page_height > capture_height:
draw = ImageDraw.Draw(stitched)
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
padding = 10
font = _get_caption_font()
bbox = draw.textbbox((0, 0), caption_text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# Draw white background rectangle
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
# Draw text centered
text_x = (max_width - text_width) // 2
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
# Encode to JPEG
output = io.BytesIO()
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
result = output.getvalue()
# Cleanup
stitched.close()
return result
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
"""
Stitch image chunks together in a separate process.
Used for large number of chunks (4+) to avoid blocking the main event loop.
"""
import os
import io
from PIL import Image, ImageDraw, ImageFont
try:
# Load images from byte chunks
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
total_height = sum(im.height for im in images)
@@ -103,17 +27,21 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
for im in images:
stitched.paste(im, (0, y_offset))
y_offset += im.height
im.close() # Close immediately after pasting
# Draw caption only if page was trimmed
# Draw caption on top (overlaid, not extending canvas)
draw = ImageDraw.Draw(stitched)
if original_page_height > capture_height:
draw = ImageDraw.Draw(stitched)
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
padding = 10
font_size = 35
font_color = (255, 0, 0)
background_color = (255, 255, 255)
# Try to load font
# Try to load a proper font
try:
font = ImageFont.truetype("arial.ttf", 35)
font = ImageFont.truetype("arial.ttf", font_size)
except IOError:
font = ImageFont.load_default()
@@ -121,16 +49,19 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# Draw white background rectangle
draw.rectangle([(0, 0), (max_width, text_height + 2 * padding)], fill=(255, 255, 255))
# Draw white rectangle background behind text
rect_top = 0
rect_bottom = text_height + 2 * padding
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
# Draw text centered
# Draw text centered horizontally, 10px padding from top of the rectangle
text_x = (max_width - text_width) // 2
draw.text((text_x, padding), caption_text, font=font, fill=(255, 0, 0))
text_y = padding
draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
# Encode and send image with optimization
# Encode and send image
output = io.BytesIO()
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)), optimize=True)
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)))
pipe_conn.send_bytes(output.getvalue())
stitched.close()

View File

@@ -28,8 +28,8 @@ class fetcher(Fetcher):
'title': 'Using a Chrome browser'
}
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
super().__init__(**kwargs)
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
super().__init__()
from urllib.parse import urlparse
from selenium.webdriver.common.proxy import Proxy
@@ -69,7 +69,6 @@ class fetcher(Fetcher):
request_body=None,
request_headers=None,
request_method=None,
screenshot_format=None,
timeout=None,
url=None,
watch_uuid=None,
@@ -147,21 +146,7 @@ class fetcher(Fetcher):
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
self.content = driver.page_source
self.headers = {}
# Selenium always captures as PNG, convert to JPEG if needed
screenshot_png = driver.get_screenshot_as_png()
# Convert to JPEG if requested (for smaller file size)
if self.screenshot_format and self.screenshot_format.upper() == 'JPEG':
from PIL import Image
import io
img = Image.open(io.BytesIO(screenshot_png))
jpeg_buffer = io.BytesIO()
img.save(jpeg_buffer, format='JPEG', quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
self.screenshot = jpeg_buffer.getvalue()
img.close()
else:
self.screenshot = screenshot_png
self.screenshot = driver.get_screenshot_as_png()
except Exception as e:
driver.quit()
raise e

View File

@@ -66,10 +66,6 @@ CORS(app)
# Super handy for compressing large BrowserSteps responses and others
FlaskCompress(app)
app.config['COMPRESS_MIN_SIZE'] = 4096
app.config['COMPRESS_MIMETYPES'] = ['text/html', 'text/css', 'text/javascript', 'application/json', 'application/javascript', 'image/svg+xml']
app.config['TEMPLATES_AUTO_RELOAD'] = False
# Stop browser caching of assets
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
@@ -82,7 +78,8 @@ if os.getenv('FLASK_SERVER_NAME'):
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
# Disables caching of the templates
app.config['TEMPLATES_AUTO_RELOAD'] = True
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
# Configure Jinja2 to search for templates in plugin directories

View File

@@ -6,15 +6,12 @@ from wtforms.widgets.core import TimeInput
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_TEMPLATE_TYPE_OPTIONS, RSS_TEMPLATE_HTML_DEFAULT
from changedetectionio.conditions.form import ConditionFormRow
from changedetectionio.notification_service import NotificationContextData
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS, \
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
from changedetectionio.strtobool import strtobool
from wtforms import (
BooleanField,
Form,
Field,
FloatField,
IntegerField,
RadioField,
SelectField,
@@ -997,24 +994,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
ignore_whitespace = BooleanField('Ignore whitespace')
# Screenshot comparison settings
min_change_percentage = FloatField(
'Screenshot: Minimum Change Percentage',
validators=[
validators.Optional(),
validators.NumberRange(min=0.0, max=100.0, message='Must be between 0 and 100')
],
default=0.1,
render_kw={"placeholder": "0.1", "style": "width: 8em;"}
)
comparison_threshold = SelectField(
'Screenshot: Pixel Difference Sensitivity',
choices=SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS,
default=SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
)
password = SaltyPasswordField()
pager_size = IntegerField('Pager size',
render_kw={"style": "width: 5em;"},

View File

@@ -46,7 +46,6 @@ class model(dict):
'global_subtractive_selectors': [],
'ignore_whitespace': True,
'ignore_status_codes': False, #@todo implement, as ternary.
'ssim_threshold': '0.96', # Default SSIM threshold for screenshot comparison
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'notification_title': default_notification_title,

View File

@@ -13,136 +13,6 @@ from .. import jinja2_custom as safe_jinja
from ..diff import ADDED_PLACEMARKER_OPEN
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
def _brotli_compress_worker(conn, filepath, mode=None):
"""
Worker function to compress data with brotli in a separate process.
This isolates memory - when process exits, OS reclaims all memory.
Args:
conn: multiprocessing.Pipe connection to receive data
filepath: destination file path
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
"""
import brotli
try:
# Receive data from parent process via pipe (avoids pickle overhead)
contents = conn.recv()
if mode is not None:
compressed_data = brotli.compress(contents, mode=mode)
else:
compressed_data = brotli.compress(contents)
with open(filepath, 'wb') as f:
f.write(compressed_data)
# Send success status back
conn.send(True)
# No need for explicit cleanup - process exit frees all memory
except Exception as e:
logger.error(f"Brotli compression worker failed: {e}")
conn.send(False)
finally:
conn.close()
def _brotli_subprocess_save(contents, filepath, mode=None, timeout=30, fallback_uncompressed=False):
"""
Save compressed data using subprocess to isolate memory.
Uses Pipe to avoid pickle overhead for large data.
Args:
contents: data to compress (str or bytes)
filepath: destination file path
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
timeout: subprocess timeout in seconds
fallback_uncompressed: if True, save uncompressed on failure; if False, raise exception
Returns:
str: actual filepath saved (may differ from input if fallback used)
Raises:
Exception: if compression fails and fallback_uncompressed is False
"""
import brotli
from multiprocessing import Process, Pipe
import sys
# Ensure contents are bytes
if isinstance(contents, str):
contents = contents.encode('utf-8')
# Create pipe for communication
parent_conn, child_conn = Pipe()
# Run compression in subprocess
# On Windows, spawn method is default and safe; on Unix, fork is used
proc = Process(target=_brotli_compress_worker, args=(child_conn, filepath, mode))
# Windows-safe: Set daemon=False explicitly to avoid issues with process cleanup
proc.daemon = False
proc.start()
try:
# Send data to subprocess via pipe (avoids pickle)
parent_conn.send(contents)
# Wait for result with timeout
if parent_conn.poll(timeout):
success = parent_conn.recv()
else:
success = False
logger.warning(f"Brotli compression subprocess timed out after {timeout}s")
# Graceful termination with platform-aware cleanup
try:
proc.terminate()
except Exception as term_error:
logger.debug(f"Process termination issue (may be normal on Windows): {term_error}")
parent_conn.close()
proc.join(timeout=5)
# Force kill if still alive after graceful termination
if proc.is_alive():
try:
if sys.platform == 'win32':
# Windows: use kill() which is more forceful
proc.kill()
else:
# Unix: terminate() already sent SIGTERM, now try SIGKILL
proc.kill()
proc.join(timeout=2)
except Exception as kill_error:
logger.warning(f"Failed to kill brotli compression process: {kill_error}")
# Check if file was created successfully
if success and os.path.exists(filepath):
return filepath
except Exception as e:
logger.error(f"Brotli compression error: {e}")
try:
parent_conn.close()
except:
pass
try:
proc.terminate()
proc.join(timeout=2)
except:
pass
# Compression failed
if fallback_uncompressed:
logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed")
fallback_path = filepath.replace('.br', '')
with open(fallback_path, 'wb') as f:
f.write(contents)
return fallback_path
else:
raise Exception(f"Brotli compression subprocess failed for {filepath}")
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
@@ -226,17 +96,8 @@ class model(watch_base):
def clear_watch(self):
import pathlib
# Get list of processor config files to preserve
from changedetectionio.processors import find_processors
processor_names = [name for cls, name in find_processors()]
processor_config_files = {f"{name}.json" for name in processor_names}
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
# But preserve processor config files (they're configuration, not history data)
for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
# Skip processor config files
if item.name in processor_config_files:
continue
os.unlink(item)
# Force the attr to recalculate
@@ -334,15 +195,12 @@ class model(watch_base):
# The index history could contain a relative path, so we need to make the fullpath
# so that python can read it
# Cross-platform: check for any path separator (works on Windows and Unix)
if os.sep not in v and '/' not in v and '\\' not in v:
# Relative filename only, no path separators
if not '/' in v and not '\'' in v:
v = os.path.join(self.watch_data_dir, v)
else:
# It's possible that they moved the datadir on older versions
# So the snapshot exists but is in a different path
# Cross-platform: use os.path.basename instead of split('/')
snapshot_fname = os.path.basename(v)
snapshot_fname = v.split('/')[-1]
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
if not os.path.exists(v) and os.path.exists(proposed_new_path):
v = proposed_new_path
@@ -430,54 +288,29 @@ class model(watch_base):
if not filepath:
filepath = self.history[timestamp]
# Check if binary file (image, PDF, etc.)
# Binary files are NEVER saved with .br compression, only text files are
binary_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.pdf', '.bin', '.jfif')
is_binary = any(filepath.endswith(ext) for ext in binary_extensions)
# See if a brotli versions exists and switch to that
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
filepath = f"{filepath}.br"
# Only look for .br versions for text files
if not is_binary:
# See if a brotli version exists and switch to that (text files only)
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
filepath = f"{filepath}.br"
# OR in the backup case that the .br does not exist, but the plain one does
if filepath.endswith('.br') and not os.path.isfile(filepath):
if os.path.isfile(filepath.replace('.br', '')):
filepath = filepath.replace('.br', '')
# OR in the backup case that the .br does not exist, but the plain one does
if filepath.endswith('.br') and not os.path.isfile(filepath):
if os.path.isfile(filepath.replace('.br', '')):
filepath = filepath.replace('.br', '')
# Handle .br compressed text files
if filepath.endswith('.br'):
# Brotli doesnt have a fileheader to detect it, so we rely on filename
# https://www.rfc-editor.org/rfc/rfc7932
# Note: .br should ONLY exist for text files, never binary
with open(filepath, 'rb') as f:
return brotli.decompress(f.read()).decode('utf-8')
return(brotli.decompress(f.read()).decode('utf-8'))
# Binary file - return raw bytes
if is_binary:
with open(filepath, 'rb') as f:
return f.read()
# Text file - decode to string
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
def _write_atomic(self, dest, data):
"""Write data atomically to dest using a temp file"""
if not os.path.exists(dest):
import tempfile
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
tmp.write(data)
tmp.flush()
os.fsync(tmp.fileno())
tmp_path = tmp.name
os.replace(tmp_path, dest)
# Save some text file to the appropriate path and bump the history
# Save some text file to the appropriate path and bump the history
# result_obj from fetch_site_status.run()
def save_history_text(self, contents, timestamp, snapshot_id):
import brotli
import tempfile
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
self.ensure_data_dir_exists()
@@ -485,54 +318,31 @@ class model(watch_base):
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
# Binary data - detect file type and save without compression
if isinstance(contents, bytes):
try:
import puremagic
detections = puremagic.magic_string(contents[:2048])
ext = detections[0].extension if detections else 'bin'
# Strip leading dot if present (puremagic returns extensions like '.jfif')
ext = ext.lstrip('.')
if detections:
logger.trace(f"Detected file type: {detections[0].mime_type} -> extension: {ext}")
except Exception as e:
logger.warning(f"puremagic detection failed: {e}, using 'bin' extension")
ext = 'bin'
snapshot_fname = f"{snapshot_id}.{ext}"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
self._write_atomic(dest, contents)
logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)")
# Text data - use brotli compression if enabled and above threshold
# Decide on snapshot filename and destination path
if not skip_brotli and len(contents) > threshold:
snapshot_fname = f"{snapshot_id}.txt.br"
encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
else:
if not skip_brotli and len(contents) > threshold:
# Compressed text
import brotli
snapshot_fname = f"{snapshot_id}.txt.br"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
snapshot_fname = f"{snapshot_id}.txt"
encoded_data = contents.encode('utf-8')
if not os.path.exists(dest):
try:
actual_dest = _brotli_subprocess_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True)
if actual_dest != dest:
snapshot_fname = os.path.basename(actual_dest)
except Exception as e:
logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
# Fallback to uncompressed
snapshot_fname = f"{snapshot_id}.txt"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
self._write_atomic(dest, contents.encode('utf-8'))
else:
# Plain text
snapshot_fname = f"{snapshot_id}.txt"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
self._write_atomic(dest, contents.encode('utf-8'))
dest = os.path.join(self.watch_data_dir, snapshot_fname)
# Write snapshot file atomically if it doesn't exist
if not os.path.exists(dest):
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
tmp.write(encoded_data)
tmp.flush()
os.fsync(tmp.fileno())
tmp_path = tmp.name
os.rename(tmp_path, dest)
# Append to history.txt atomically
index_fname = os.path.join(self.watch_data_dir, "history.txt")
index_line = f"{timestamp},{snapshot_fname}\n"
# Lets try force flush here since it's usually a very small file
# If this still fails in the future then try reading all to memory first, re-writing etc
with open(index_fname, 'a', encoding='utf-8') as f:
f.write(index_line)
f.flush()
@@ -940,13 +750,25 @@ class model(watch_base):
def save_last_text_fetched_before_filters(self, contents):
import brotli
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
_brotli_subprocess_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
with open(filepath, 'wb') as f:
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
def save_last_fetched_html(self, timestamp, contents):
import brotli
self.ensure_data_dir_exists()
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
_brotli_subprocess_save(contents, filepath, mode=None, fallback_uncompressed=True)
with open(filepath, 'wb') as f:
contents = contents.encode('utf-8') if isinstance(contents, str) else contents
try:
f.write(brotli.compress(contents))
except Exception as e:
logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
logger.warning(e)
f.write(contents)
self._prune_last_fetched_html_snapshots()
def get_fetched_html(self, timestamp):

View File

@@ -425,29 +425,12 @@ def save_plugin_settings(datastore_path, plugin_id, settings):
def get_plugin_template_paths():
"""Get list of plugin template directories for Jinja2 loader.
Scans both external pluggy plugins and built-in processor plugins.
Returns:
list: List of absolute paths to plugin template directories
"""
template_paths = []
# Scan built-in processor plugins
from changedetectionio.processors import find_processors
processor_list = find_processors()
for processor_module, processor_name in processor_list:
# Each processor is a module, check if it has a templates directory
if hasattr(processor_module, '__file__'):
processor_file = processor_module.__file__
if processor_file:
# Get the processor directory (e.g., processors/image_ssim_diff/)
processor_dir = os.path.dirname(os.path.abspath(processor_file))
templates_dir = os.path.join(processor_dir, 'templates')
if os.path.isdir(templates_dir):
template_paths.append(templates_dir)
logger.debug(f"Added processor template path: {templates_dir}")
# Get all registered external pluggy plugins
# Get all registered plugins
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
# Check if plugin has a templates directory
if hasattr(plugin_obj, '__file__'):

View File

@@ -2,7 +2,6 @@ from abc import abstractmethod
from changedetectionio.content_fetchers.base import Fetcher
from changedetectionio.strtobool import strtobool
from copy import deepcopy
from functools import lru_cache
from loguru import logger
import hashlib
import importlib
@@ -11,9 +10,6 @@ import os
import pkgutil
import re
SCREENSHOT_FORMAT_JPEG = 'JPEG'
SCREENSHOT_FORMAT_PNG = 'PNG'
class difference_detection_processor():
browser_steps = None
@@ -23,9 +19,9 @@ class difference_detection_processor():
watch = None
xpath_data = None
preferred_proxy = None
screenshot_format = SCREENSHOT_FORMAT_JPEG
def __init__(self, datastore, watch_uuid):
def __init__(self, *args, datastore, watch_uuid, **kwargs):
super().__init__(*args, **kwargs)
self.datastore = datastore
self.watch_uuid = watch_uuid
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
@@ -101,8 +97,7 @@ class difference_detection_processor():
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
self.fetcher = fetcher_obj(proxy_override=proxy_url,
custom_browser_connection_url=custom_browser_connection_url,
screenshot_format=self.screenshot_format
custom_browser_connection_url=custom_browser_connection_url
)
if self.watch.has_browser_steps:
@@ -164,7 +159,6 @@ class difference_detection_processor():
request_body=request_body,
request_headers=request_headers,
request_method=request_method,
screenshot_format = self.screenshot_format,
timeout=timeout,
url=url,
watch_uuid=self.watch_uuid,
@@ -175,84 +169,6 @@ class difference_detection_processor():
# After init, call run_changedetection() which will do the actual change-detection
def get_extra_watch_config(self, filename):
"""
Read processor-specific JSON config file from watch data directory.
Args:
filename: Name of JSON file (e.g., "visual_ssim_score.json")
Returns:
dict: Parsed JSON data, or empty dict if file doesn't exist
"""
import json
import os
watch = self.datastore.data['watching'].get(self.watch_uuid)
watch_data_dir = watch.watch_data_dir
if not watch_data_dir:
return {}
filepath = os.path.join(watch_data_dir, filename)
if not os.path.isfile(filepath):
return {}
try:
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, IOError) as e:
logger.warning(f"Failed to read extra watch config {filename}: {e}")
return {}
def update_extra_watch_config(self, filename, data, merge=True):
"""
Write processor-specific JSON config file to watch data directory.
Args:
filename: Name of JSON file (e.g., "visual_ssim_score.json")
data: Dictionary to serialize as JSON
merge: If True, merge with existing data; if False, overwrite completely
"""
import json
import os
watch = self.datastore.data['watching'].get(self.watch_uuid)
watch_data_dir = watch.watch_data_dir
if not watch_data_dir:
logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
return
# Ensure directory exists
watch.ensure_data_dir_exists()
filepath = os.path.join(watch_data_dir, filename)
try:
# If merge is enabled, read existing data first
existing_data = {}
if merge and os.path.isfile(filepath):
try:
with open(filepath, 'r', encoding='utf-8') as f:
existing_data = json.load(f)
except (json.JSONDecodeError, IOError) as e:
logger.warning(f"Failed to read existing config for merge: {e}")
# Merge new data with existing
if merge:
existing_data.update(data)
data_to_save = existing_data
else:
data_to_save = data
# Write the data
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data_to_save, f, indent=2)
except IOError as e:
logger.error(f"Failed to write extra watch config {filename}: {e}")
@abstractmethod
def run_changedetection(self, watch):
update_obj = {'last_notification_error': False, 'last_error': False}
@@ -292,12 +208,8 @@ def find_processors():
# Iterate through all classes in the module
for name, obj in inspect.getmembers(module, inspect.isclass):
# Only register classes that are actually defined in this module (not imported)
if (issubclass(obj, difference_detection_processor) and
obj is not difference_detection_processor and
obj.__module__ == module.__name__):
if issubclass(obj, difference_detection_processor) and obj is not difference_detection_processor:
processors.append((module, sub_package))
break # Only need one processor per module
except (ModuleNotFoundError, ImportError) as e:
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
@@ -332,206 +244,17 @@ def get_custom_watch_obj_for_processor(processor_name):
return watch_class
def find_processor_module(processor_name):
"""
Find the processor module by name.
Args:
processor_name: Processor machine name (e.g., 'image_ssim_diff')
Returns:
module: The processor's parent module, or None if not found
"""
processor_classes = find_processors()
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
if processor_tuple:
# Return the parent module (the package containing processor.py)
return get_parent_module(processor_tuple[0])
return None
def available_processors():
"""
Get a list of processors by name and description for the UI elements.
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
Get a list of processors by name and description for the UI elements
:return: A list :)
"""
processor_classes = find_processors()
# Check if ALLOWED_PROCESSORS env var is set
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', '').strip()
allowed_processors = None
if allowed_processors_env:
# Parse comma-separated list and strip whitespace
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
available = []
for module, sub_package_name in processor_classes:
# Filter by allowed processors if set
if allowed_processors and sub_package_name not in allowed_processors:
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
continue
for package, processor_class in processor_classes:
available.append((processor_class, package.name))
# Try to get the 'name' attribute from the processor module first
if hasattr(module, 'name'):
description = module.name
else:
# Fall back to processor_description from parent module's __init__.py
parent_module = get_parent_module(module)
if parent_module and hasattr(parent_module, 'processor_description'):
description = parent_module.processor_description
else:
# Final fallback to a readable name
description = sub_package_name.replace('_', ' ').title()
# Get weight for sorting (lower weight = higher in list)
weight = 0 # Default weight for processors without explicit weight
# Check processor module itself first
if hasattr(module, 'processor_weight'):
weight = module.processor_weight
else:
# Fall back to parent module (package __init__.py)
parent_module = get_parent_module(module)
if parent_module and hasattr(parent_module, 'processor_weight'):
weight = parent_module.processor_weight
available.append((sub_package_name, description, weight))
# Sort by weight (lower weight = appears first)
available.sort(key=lambda x: x[2])
# Return as tuples without weight (for backwards compatibility)
return [(name, desc) for name, desc, weight in available]
@lru_cache(maxsize=1)
def get_processor_badge_texts():
"""
Get a dictionary mapping processor names to their list_badge_text values.
Cached to avoid repeated lookups.
:return: A dict mapping processor name to badge text (e.g., {'text_json_diff': 'Text', 'restock_diff': 'Restock'})
"""
processor_classes = find_processors()
badge_texts = {}
for module, sub_package_name in processor_classes:
# Try to get the 'list_badge_text' attribute from the processor module
if hasattr(module, 'list_badge_text'):
badge_texts[sub_package_name] = module.list_badge_text
else:
# Fall back to parent module's __init__.py
parent_module = get_parent_module(module)
if parent_module and hasattr(parent_module, 'list_badge_text'):
badge_texts[sub_package_name] = parent_module.list_badge_text
return badge_texts
@lru_cache(maxsize=1)
def get_processor_descriptions():
"""
Get a dictionary mapping processor names to their description/name values.
Cached to avoid repeated lookups.
:return: A dict mapping processor name to description (e.g., {'text_json_diff': 'Webpage Text/HTML, JSON and PDF changes'})
"""
processor_classes = find_processors()
descriptions = {}
for module, sub_package_name in processor_classes:
# Try to get the 'name' or 'description' attribute from the processor module first
if hasattr(module, 'name'):
descriptions[sub_package_name] = module.name
elif hasattr(module, 'description'):
descriptions[sub_package_name] = module.description
else:
# Fall back to parent module's __init__.py
parent_module = get_parent_module(module)
if parent_module and hasattr(parent_module, 'processor_description'):
descriptions[sub_package_name] = parent_module.processor_description
elif parent_module and hasattr(parent_module, 'name'):
descriptions[sub_package_name] = parent_module.name
else:
# Final fallback to a readable name
descriptions[sub_package_name] = sub_package_name.replace('_', ' ').title()
return descriptions
def generate_processor_badge_colors(processor_name):
"""
Generate consistent colors for a processor badge based on its name.
Uses a hash of the processor name to generate pleasing, accessible colors
for both light and dark modes.
:param processor_name: The processor name (e.g., 'text_json_diff')
:return: A dict with 'light' and 'dark' color schemes, each containing 'bg' and 'color'
"""
import hashlib
# Generate a consistent hash from the processor name
hash_obj = hashlib.md5(processor_name.encode('utf-8'))
hash_int = int(hash_obj.hexdigest()[:8], 16)
# Generate hue from hash (0-360)
hue = hash_int % 360
# Light mode: pastel background with darker text
light_saturation = 60 + (hash_int % 25) # 60-85%
light_lightness = 85 + (hash_int % 10) # 85-95% - very light
text_lightness = 25 + (hash_int % 15) # 25-40% - dark
# Dark mode: solid, vibrant colors with white text
dark_saturation = 55 + (hash_int % 20) # 55-75%
dark_lightness = 45 + (hash_int % 15) # 45-60%
return {
'light': {
'bg': f'hsl({hue}, {light_saturation}%, {light_lightness}%)',
'color': f'hsl({hue}, 50%, {text_lightness}%)'
},
'dark': {
'bg': f'hsl({hue}, {dark_saturation}%, {dark_lightness}%)',
'color': '#fff'
}
}
@lru_cache(maxsize=1)
def get_processor_badge_css():
"""
Generate CSS for all processor badges with auto-generated colors.
This creates CSS rules for both light and dark modes for each processor.
:return: A string containing CSS rules for all processor badges
"""
processor_classes = find_processors()
css_rules = []
for module, sub_package_name in processor_classes:
colors = generate_processor_badge_colors(sub_package_name)
# Light mode rule
css_rules.append(
f".processor-badge-{sub_package_name} {{\n"
f" background-color: {colors['light']['bg']};\n"
f" color: {colors['light']['color']};\n"
f"}}"
)
# Dark mode rule
css_rules.append(
f"html[data-darkmode=\"true\"] .processor-badge-{sub_package_name} {{\n"
f" background-color: {colors['dark']['bg']};\n"
f" color: {colors['dark']['color']};\n"
f"}}"
)
return '\n\n'.join(css_rules)
return available

View File

@@ -1,210 +0,0 @@
# Fast Screenshot Comparison Processor
Visual/screenshot change detection using ultra-fast image comparison algorithms.
## Overview
This processor uses **OpenCV** by default for screenshot comparison, providing **50-100x faster** performance compared to the previous SSIM implementation while still detecting meaningful visual changes.
## Current Features
- **Ultra-fast OpenCV comparison**: cv2.absdiff with Gaussian blur for noise reduction
- **MD5 pre-check**: Fast identical image detection before expensive comparison
- **Configurable sensitivity**: Threshold-based change detection
- **Three-panel diff view**: Previous | Current | Difference (with red highlights)
- **Direct image support**: Works with browser screenshots AND direct image URLs
- **Visual selector support**: Compare specific page regions using CSS/XPath selectors
- **Download images**: Download any of the three comparison images directly from the diff view
## Performance
- **OpenCV (default)**: 50-100x faster than SSIM
- **Large screenshots**: Automatic downscaling for diff visualization (configurable via `MAX_DIFF_HEIGHT`/`MAX_DIFF_WIDTH`)
- **Memory efficient**: Explicit cleanup of large objects for long-running processes
- **JPEG diff images**: Smaller file sizes, faster rendering
## How It Works
1. **Fetch**: Screenshot captured via browser OR direct image URL fetched
2. **MD5 Check**: Quick hash comparison - if identical, skip comparison
3. **Region Selection** (optional): Crop to specific page region if visual selector is configured
4. **OpenCV Comparison**: Fast pixel-level difference detection with Gaussian blur
5. **Change Detection**: Percentage of changed pixels above threshold = change detected
6. **Visualization**: Generate diff image with red-highlighted changed regions
## Architecture
### Default Method: OpenCV
The processor uses OpenCV's `cv2.absdiff()` for ultra-fast pixel-level comparison:
```python
# Convert to grayscale
gray_from = cv2.cvtColor(image_from, cv2.COLOR_RGB2GRAY)
gray_to = cv2.cvtColor(image_to, cv2.COLOR_RGB2GRAY)
# Apply Gaussian blur (reduces noise, controlled by OPENCV_BLUR_SIGMA env var)
gray_from = cv2.GaussianBlur(gray_from, (0, 0), sigma=0.8)
gray_to = cv2.GaussianBlur(gray_to, (0, 0), sigma=0.8)
# Calculate absolute difference
diff = cv2.absdiff(gray_from, gray_to)
# Apply threshold (default: 30)
_, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
# Count changed pixels
change_percentage = (changed_pixels / total_pixels) * 100
```
### Optional: Pixelmatch
For users who need better anti-aliasing detection (especially for text-heavy screenshots), **pixelmatch** can be optionally installed:
```bash
pip install pybind11-pixelmatch>=0.1.3
```
**Note**: Pixelmatch uses a C++17 implementation via pybind11 and may have build issues on some platforms (particularly Alpine/musl systems with symbolic link security restrictions). The application will automatically fall back to OpenCV if pixelmatch is not available.
To use pixelmatch instead of OpenCV, set the environment variable:
```bash
COMPARISON_METHOD=pixelmatch
```
#### When to use pixelmatch:
- Screenshots with lots of text and anti-aliasing
- Need to ignore minor font rendering differences between browser versions
- 10-20x faster than SSIM (but slower than OpenCV)
#### When to stick with OpenCV (default):
- General webpage monitoring
- Maximum performance (50-100x faster than SSIM)
- Simple pixel-level change detection
- Avoid build dependencies (Alpine/musl systems)
## Configuration
### Environment Variables
```bash
# Comparison method (opencv or pixelmatch)
COMPARISON_METHOD=opencv # Default
# OpenCV threshold (0-255, lower = more sensitive)
COMPARISON_THRESHOLD_OPENCV=30 # Default
# Pixelmatch threshold (0-100, mapped to 0-1 scale)
COMPARISON_THRESHOLD_PIXELMATCH=10 # Default
# Gaussian blur sigma for OpenCV (0 = no blur, higher = more blur)
OPENCV_BLUR_SIGMA=0.8 # Default
# Minimum change percentage to trigger detection
OPENCV_MIN_CHANGE_PERCENT=0.1 # Default (0.1%)
PIXELMATCH_MIN_CHANGE_PERCENT=0.1 # Default
# Diff visualization image size limits (pixels)
MAX_DIFF_HEIGHT=8000 # Default
MAX_DIFF_WIDTH=900 # Default
```
### Per-Watch Configuration
- **Comparison Threshold**: Can be configured per-watch in the edit form
- Very low sensitivity (10) - Only major changes
- Low sensitivity (20) - Significant changes
- Medium sensitivity (30) - Moderate changes (default)
- High sensitivity (50) - Small changes
- Very high sensitivity (75) - Any visible change
### Visual Selector (Region Comparison)
Use the "Include filters" field with CSS selectors or XPath to compare only specific page regions:
```
.content-area
//div[@id='main']
```
The processor will automatically crop both screenshots to the bounding box of the first matched element.
## Dependencies
### Required
- `opencv-python-headless>=4.8.0.76` - Fast image comparison
- `Pillow (PIL)` - Image loading and manipulation
- `numpy` - Array operations
### Optional
- `pybind11-pixelmatch>=0.1.3` - Alternative comparison method with anti-aliasing detection
## Change Detection Interpretation
- **0%** = Identical images (or below minimum change threshold)
- **0.1-1%** = Minor differences (anti-aliasing, slight rendering differences)
- **1-5%** = Noticeable changes (text updates, small content changes)
- **5-20%** = Significant changes (layout shifts, content additions)
- **>20%** = Major differences (page redesign, large content changes)
## Technical Notes
### Memory Management
```python
# Explicit cleanup for long-running processes
img.close() # Close PIL Images
buffer.close() # Close BytesIO buffers
del large_array # Mark numpy arrays for GC
```
### Diff Image Generation
- Format: JPEG (quality=85, optimized)
- Highlight: Red overlay (50% blend with original)
- Auto-downscaling: Large screenshots downscaled for faster rendering
- Base64 embedded: For direct template rendering
### OpenCV Blur Parameters
The Gaussian blur reduces sensitivity to:
- Font rendering differences
- Anti-aliasing variations
- JPEG compression artifacts
- Minor pixel shifts (1-2 pixels)
Increase `OPENCV_BLUR_SIGMA` to make comparison more tolerant of these differences.
## Comparison: OpenCV vs Pixelmatch vs SSIM
| Feature | OpenCV | Pixelmatch | SSIM (old) |
|---------|--------|------------|------------|
| **Speed** | 50-100x faster | 10-20x faster | Baseline |
| **Anti-aliasing** | Via blur | Built-in detection | Built-in |
| **Text sensitivity** | High | Medium (AA-aware) | Medium |
| **Dependencies** | opencv-python-headless | pybind11-pixelmatch + C++ compiler | scikit-image |
| **Alpine/musl support** | ✅ Yes | ⚠️ Build issues | ✅ Yes |
| **Memory usage** | Low | Low | High |
| **Best for** | General use, max speed | Text-heavy screenshots | Deprecated |
## Migration from SSIM
If you're upgrading from the old SSIM-based processor:
1. **Thresholds are different**: SSIM used 0-1 scale (higher = more similar), OpenCV uses 0-255 pixel difference (lower = more similar)
2. **Default threshold**: Start with 30 for OpenCV, adjust based on your needs
3. **Performance**: Expect dramatically faster comparisons, especially for large screenshots
4. **Accuracy**: OpenCV is more sensitive to pixel-level changes; increase `OPENCV_BLUR_SIGMA` if you're getting false positives
## Future Enhancements
Potential features for future consideration:
- **Change region detection**: Highlight specific areas that changed with bounding boxes
- **Perceptual hashing**: Pre-screening filter for even faster checks
- **Ignore regions**: Exclude specific page areas (ads, timestamps) from comparison
- **Text extraction**: OCR-based text comparison for semantic changes
- **Adaptive thresholds**: Different sensitivity for different page regions
## Resources
- [OpenCV Documentation](https://docs.opencv.org/)
- [pybind11-pixelmatch GitHub](https://github.com/whtsky/pybind11-pixelmatch)
- [Pixelmatch (original JS library)](https://github.com/mapbox/pixelmatch)

View File

@@ -1,28 +0,0 @@
"""
Visual/screenshot change detection using fast image comparison algorithms.
This processor compares screenshots using OpenCV (cv2.absdiff),
which is 10-100x faster than SSIM while still detecting meaningful visual changes.
"""
import os
processor_description = "Visual/Screenshot change detection (Fast)"
processor_name = "image_ssim_diff"
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
# Subprocess timeout settings
# Maximum time to wait for subprocess operations (seconds)
POLL_TIMEOUT_ABSOLUTE = int(os.getenv('OPENCV_SUBPROCESS_TIMEOUT', '20'))
# Template tracking filename
CROPPED_IMAGE_TEMPLATE_FILENAME = 'cropped_image_template.png'
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS = [
('75', 'Low sensitivity (only major changes)'),
('30', 'Medium sensitivity (moderate changes)'),
('20', 'High sensitivity (small changes)'),
('0', 'Very high sensitivity (any change)')
]
SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT=0.999

View File

@@ -1,431 +0,0 @@
"""
Screenshot diff visualization for fast image comparison processor.
All image operations now use ImageDiffHandler abstraction for clean separation
of concerns and easy backend swapping (LibVIPS, OpenCV, PIL, etc.).
"""
import os
import json
import time
from loguru import logger
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
# All image operations now use OpenCV via isolated_opencv subprocess handler
# No direct handler imports needed - subprocess isolation handles everything
# Maximum dimensions for diff visualization (can be overridden via environment variable)
# Large screenshots don't need full resolution for visual inspection
# Reduced defaults to minimize memory usage - 2000px height is plenty for diff viewing
MAX_DIFF_HEIGHT = int(os.getenv('MAX_DIFF_HEIGHT', '8000'))
MAX_DIFF_WIDTH = int(os.getenv('MAX_DIFF_WIDTH', '900'))
def get_asset(asset_name, watch, datastore, request):
"""
Get processor-specific binary assets for streaming.
Uses ImageDiffHandler for all image operations - no more multiprocessing needed
as LibVIPS handles threading/memory internally.
Supported assets:
- 'before': The previous/from screenshot
- 'after': The current/to screenshot
- 'rendered_diff': The generated diff visualization with red highlights
Args:
asset_name: Name of the asset to retrieve ('before', 'after', 'rendered_diff')
watch: Watch object
datastore: Datastore object
request: Flask request (for from_version/to_version query params)
Returns:
tuple: (binary_data, content_type, cache_control_header) or None if not found
"""
# Get version parameters from query string
versions = list(watch.history.keys())
if len(versions) < 2:
return None
from_version = request.args.get('from_version', versions[-2] if len(versions) >= 2 else versions[0])
to_version = request.args.get('to_version', versions[-1])
# Validate versions exist
if from_version not in versions:
from_version = versions[-2] if len(versions) >= 2 else versions[0]
if to_version not in versions:
to_version = versions[-1]
try:
if asset_name == 'before':
# Return the 'from' screenshot with bounding box if configured
img_bytes = watch.get_history_snapshot(timestamp=from_version)
img_bytes = _draw_bounding_box_if_configured(img_bytes, watch, datastore)
mime_type = _detect_mime_type(img_bytes)
return (img_bytes, mime_type, 'public, max-age=3600')
elif asset_name == 'after':
# Return the 'to' screenshot with bounding box if configured
img_bytes = watch.get_history_snapshot(timestamp=to_version)
img_bytes = _draw_bounding_box_if_configured(img_bytes, watch, datastore)
mime_type = _detect_mime_type(img_bytes)
return (img_bytes, mime_type, 'public, max-age=3600')
elif asset_name == 'rendered_diff':
# Generate diff in isolated subprocess to prevent memory leaks
# Subprocess provides complete memory isolation
from .image_handler import isolated_opencv as process_screenshot_handler
img_bytes_from = watch.get_history_snapshot(timestamp=from_version)
img_bytes_to = watch.get_history_snapshot(timestamp=to_version)
# Get pixel difference threshold sensitivity
pixel_difference_threshold_sensitivity = watch.get('comparison_threshold')
if not pixel_difference_threshold_sensitivity or pixel_difference_threshold_sensitivity == '':
pixel_difference_threshold_sensitivity = datastore.data['settings']['application'].get('comparison_threshold', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT)
try:
pixel_difference_threshold_sensitivity = float(pixel_difference_threshold_sensitivity)
except (ValueError, TypeError):
pixel_difference_threshold_sensitivity = 30.0
# Get blur sigma
blur_sigma = float(os.getenv("OPENCV_BLUR_SIGMA", "0.8"))
# Generate diff in isolated subprocess (async-safe)
import asyncio
import threading
# Async-safe wrapper: runs coroutine in new thread with its own event loop
def run_async_in_thread():
return asyncio.run(
process_screenshot_handler.generate_diff_isolated(
img_bytes_from,
img_bytes_to,
pixel_difference_threshold=int(pixel_difference_threshold_sensitivity),
blur_sigma=blur_sigma,
max_width=MAX_DIFF_WIDTH,
max_height=MAX_DIFF_HEIGHT
)
)
# Run in thread to avoid blocking event loop if called from async context
result_container = [None]
exception_container = [None]
def thread_target():
try:
result_container[0] = run_async_in_thread()
except Exception as e:
exception_container[0] = e
thread = threading.Thread(target=thread_target)
thread.start()
thread.join(timeout=60)
if exception_container[0]:
raise exception_container[0]
diff_image_bytes = result_container[0]
if diff_image_bytes:
# Note: Bounding box drawing on diff not yet implemented
return (diff_image_bytes, 'image/jpeg', 'public, max-age=300')
else:
logger.error("Failed to generate diff in subprocess")
return None
else:
# Unknown asset
return None
except Exception as e:
logger.error(f"Failed to get asset '{asset_name}': {e}")
import traceback
logger.error(traceback.format_exc())
return None
def _detect_mime_type(img_bytes):
"""
Detect MIME type using puremagic (same as Watch.py).
Args:
img_bytes: Image bytes
Returns:
str: MIME type (e.g., 'image/png', 'image/jpeg')
"""
try:
import puremagic
detections = puremagic.magic_string(img_bytes[:2048])
if detections:
mime_type = detections[0].mime_type
logger.trace(f"Detected MIME type: {mime_type}")
return mime_type
else:
logger.trace("No MIME type detected, using 'image/png' fallback")
return 'image/png'
except Exception as e:
logger.warning(f"puremagic detection failed: {e}, using 'image/png' fallback")
return 'image/png'
def _draw_bounding_box_if_configured(img_bytes, watch, datastore):
"""
Draw blue bounding box on image if configured in processor settings.
Uses isolated subprocess to prevent memory leaks from large images.
Supports two modes:
- "Select by element": Use include_filter to find xpath element bbox
- "Draw area": Use manually drawn bounding_box from config
Args:
img_bytes: Image bytes (PNG)
watch: Watch object
datastore: Datastore object
Returns:
Image bytes (possibly with bounding box drawn)
"""
try:
# Get processor configuration
from changedetectionio import processors
processor_instance = processors.difference_detection_processor(datastore, watch.get('uuid'))
processor_name = watch.get('processor', 'default')
config_filename = f'{processor_name}.json'
processor_config = processor_instance.get_extra_watch_config(config_filename)
if not processor_config:
return img_bytes
selection_mode = processor_config.get('selection_mode', 'draw')
x, y, width, height = None, None, None, None
# Mode 1: Select by element (use include_filter + xpath_data)
if selection_mode == 'element':
include_filters = watch.get('include_filters', [])
if include_filters and len(include_filters) > 0:
first_filter = include_filters[0].strip()
# Get xpath_data from watch history
history_keys = list(watch.history.keys())
if history_keys:
latest_snapshot = watch.get_history_snapshot(timestamp=history_keys[-1])
xpath_data_path = watch.get_xpath_data_filepath(timestamp=history_keys[-1])
try:
import gzip
with gzip.open(xpath_data_path, 'rt') as f:
xpath_data = json.load(f)
# Find matching element
for element in xpath_data.get('size_pos', []):
if element.get('xpath') == first_filter and element.get('highlight_as_custom_filter'):
x = element.get('left', 0)
y = element.get('top', 0)
width = element.get('width', 0)
height = element.get('height', 0)
logger.debug(f"Found element bbox for filter '{first_filter}': x={x}, y={y}, w={width}, h={height}")
break
except Exception as e:
logger.warning(f"Failed to load xpath_data for element selection: {e}")
# Mode 2: Draw area (use manually configured bbox)
else:
bounding_box = processor_config.get('bounding_box')
if bounding_box:
# Parse bounding box: "x,y,width,height"
parts = [int(p.strip()) for p in bounding_box.split(',')]
if len(parts) == 4:
x, y, width, height = parts
else:
logger.warning(f"Invalid bounding box format: {bounding_box}")
# If no bbox found, return original image
if x is None or y is None or width is None or height is None:
return img_bytes
# Use isolated subprocess to prevent memory leaks from large images
from .image_handler import isolated_opencv
import asyncio
import threading
# Async-safe wrapper: runs coroutine in new thread with its own event loop
# This prevents blocking when called from async context (update worker)
def run_async_in_thread():
return asyncio.run(
isolated_opencv.draw_bounding_box_isolated(
img_bytes, x, y, width, height,
color=(255, 0, 0), # Blue in BGR format
thickness=3
)
)
# Always run in thread to avoid blocking event loop if called from async context
result_container = [None]
exception_container = [None]
def thread_target():
try:
result_container[0] = run_async_in_thread()
except Exception as e:
exception_container[0] = e
thread = threading.Thread(target=thread_target)
thread.start()
thread.join(timeout=15)
if exception_container[0]:
raise exception_container[0]
result = result_container[0]
# Return result or original if subprocess failed
return result if result else img_bytes
except Exception as e:
logger.warning(f"Failed to draw bounding box: {e}")
import traceback
logger.debug(traceback.format_exc())
return img_bytes
def render(watch, datastore, request, url_for, render_template, flash, redirect):
"""
Render the screenshot comparison diff page.
Uses ImageDiffHandler for all image operations.
Args:
watch: Watch object
datastore: Datastore object
request: Flask request
url_for: Flask url_for function
render_template: Flask render_template function
flash: Flask flash function
redirect: Flask redirect function
Returns:
Rendered template or redirect
"""
# Get version parameters (from_version, to_version)
versions = list(watch.history.keys())
if len(versions) < 2:
flash("Not enough history to compare. Need at least 2 snapshots.", "error")
return redirect(url_for('watchlist.index'))
# Default: compare latest two versions
from_version = request.args.get('from_version', versions[-2] if len(versions) >= 2 else versions[0])
to_version = request.args.get('to_version', versions[-1])
# Validate versions exist
if from_version not in versions:
from_version = versions[-2] if len(versions) >= 2 else versions[0]
if to_version not in versions:
to_version = versions[-1]
# Get pixel difference threshold sensitivity (per-watch > global > env default)
pixel_difference_threshold_sensitivity = watch.get('comparison_threshold')
if not pixel_difference_threshold_sensitivity or pixel_difference_threshold_sensitivity == '':
pixel_difference_threshold_sensitivity = datastore.data['settings']['application'].get('comparison_threshold', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT)
# Convert to appropriate type
try:
pixel_difference_threshold_sensitivity = float(pixel_difference_threshold_sensitivity)
except (ValueError, TypeError):
logger.warning(f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default")
pixel_difference_threshold_sensitivity = 30.0
# Get blur sigma
blur_sigma = float(os.getenv("OPENCV_BLUR_SIGMA", "0.8"))
# Load screenshots from history
try:
img_bytes_from = watch.get_history_snapshot(timestamp=from_version)
img_bytes_to = watch.get_history_snapshot(timestamp=to_version)
except Exception as e:
logger.error(f"Failed to load screenshots: {e}")
flash(f"Failed to load screenshots: {e}", "error")
return redirect(url_for('watchlist.index'))
# Calculate change percentage using isolated subprocess to prevent memory leaks (async-safe)
now = time.time()
try:
from .image_handler import isolated_opencv as process_screenshot_handler
import asyncio
import threading
# Async-safe wrapper: runs coroutine in new thread with its own event loop
def run_async_in_thread():
return asyncio.run(
process_screenshot_handler.calculate_change_percentage_isolated(
img_bytes_from,
img_bytes_to,
pixel_difference_threshold=int(pixel_difference_threshold_sensitivity),
blur_sigma=blur_sigma,
max_width=MAX_DIFF_WIDTH,
max_height=MAX_DIFF_HEIGHT
)
)
# Run in thread to avoid blocking event loop if called from async context
result_container = [None]
exception_container = [None]
def thread_target():
try:
result_container[0] = run_async_in_thread()
except Exception as e:
exception_container[0] = e
thread = threading.Thread(target=thread_target)
thread.start()
thread.join(timeout=60)
if exception_container[0]:
raise exception_container[0]
change_percentage = result_container[0]
method_display = f"{process_screenshot_handler.IMPLEMENTATION_NAME} (pixel_diff_threshold: {pixel_difference_threshold_sensitivity:.0f})"
logger.debug(f"Done change percentage calculation in {time.time() - now:.2f}s")
except Exception as e:
logger.error(f"Failed to calculate change percentage: {e}")
import traceback
logger.error(traceback.format_exc())
flash(f"Failed to calculate diff: {e}", "error")
return redirect(url_for('watchlist.index'))
# Load historical data if available (for charts/visualization)
comparison_data = {}
comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
if os.path.isfile(comparison_config_path):
try:
with open(comparison_config_path, 'r') as f:
comparison_data = json.load(f)
except Exception as e:
logger.warning(f"Failed to load comparison history data: {e}")
# Render custom template
# Template path is namespaced to avoid conflicts with other processors
# Images are now served via separate /processor-asset/ endpoints instead of base64
return render_template(
'image_ssim_diff/diff.html',
watch=watch,
uuid=watch.get('uuid'),
change_percentage=change_percentage,
comparison_data=comparison_data, # Full history for charts/visualization
threshold=pixel_difference_threshold_sensitivity,
comparison_method=method_display,
versions=versions,
from_version=from_version,
to_version=to_version,
percentage_different=change_percentage
)

View File

@@ -1,151 +0,0 @@
"""
Optional hook called when processor settings are saved in edit page.
This hook analyzes the selected region to determine if template matching
should be enabled for tracking content movement.
Template matching is controlled via ENABLE_TEMPLATE_TRACKING env var (default: False).
"""
import io
import os
from loguru import logger
from changedetectionio import strtobool
from . import CROPPED_IMAGE_TEMPLATE_FILENAME
# Template matching controlled via environment variable (default: disabled)
# Set ENABLE_TEMPLATE_TRACKING=True to enable
TEMPLATE_MATCHING_ENABLED = strtobool(os.getenv('ENABLE_TEMPLATE_TRACKING', 'False'))
IMPORT_ERROR = "Template matching disabled (set ENABLE_TEMPLATE_TRACKING=True to enable)"
def on_config_save(watch, processor_config, datastore):
"""
Called after processor config is saved in edit page.
Analyzes the bounding box region to determine if it has enough
visual features (texture/edges) to enable template matching for
tracking content movement when page layout shifts.
Args:
watch: Watch object
processor_config: Dict of processor-specific config
datastore: Datastore object
Returns:
dict: Updated processor_config with auto_track_region setting
"""
# Check if template matching is globally enabled via ENV var
if not TEMPLATE_MATCHING_ENABLED:
logger.debug("Template tracking disabled via ENABLE_TEMPLATE_TRACKING env var")
processor_config['auto_track_region'] = False
return processor_config
bounding_box = processor_config.get('bounding_box')
if not bounding_box:
# No bounding box, disable tracking
processor_config['auto_track_region'] = False
logger.debug("No bounding box set, disabled auto-tracking")
return processor_config
try:
# Get the latest screenshot from watch history
history_keys = list(watch.history.keys())
if len(history_keys) == 0:
logger.warning("No screenshot history available yet, cannot analyze for tracking")
processor_config['auto_track_region'] = False
return processor_config
# Get latest screenshot
latest_timestamp = history_keys[-1]
screenshot_bytes = watch.get_history_snapshot(timestamp=latest_timestamp)
if not screenshot_bytes:
logger.warning("Could not load screenshot for analysis")
processor_config['auto_track_region'] = False
return processor_config
# Parse bounding box
parts = [int(p.strip()) for p in bounding_box.split(',')]
if len(parts) != 4:
logger.warning("Invalid bounding box format")
processor_config['auto_track_region'] = False
return processor_config
x, y, width, height = parts
# Analyze the region for features/texture
has_enough_features = analyze_region_features(screenshot_bytes, x, y, width, height)
if has_enough_features:
logger.info(f"Region has sufficient features for tracking - enabling auto_track_region")
processor_config['auto_track_region'] = True
# Save the template as cropped.jpg in watch data directory
save_template_to_file(watch, screenshot_bytes, x, y, width, height)
else:
logger.info(f"Region lacks distinctive features - disabling auto_track_region")
processor_config['auto_track_region'] = False
# Remove old template file if exists
template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
if os.path.exists(template_path):
os.remove(template_path)
logger.debug(f"Removed old template file: {template_path}")
return processor_config
except Exception as e:
logger.error(f"Error analyzing region for tracking: {e}")
processor_config['auto_track_region'] = False
return processor_config
def analyze_region_features(screenshot_bytes, x, y, width, height):
"""
Analyze if a region has enough visual features for template matching.
Uses OpenCV to detect corners/edges. If the region has distinctive
features, template matching can reliably track it when it moves.
Args:
screenshot_bytes: Full screenshot as bytes
x, y, width, height: Bounding box coordinates
Returns:
bool: True if region has enough features, False otherwise
"""
# Template matching disabled - would need OpenCV implementation for region analysis
if not TEMPLATE_MATCHING_ENABLED:
logger.warning(f"Cannot analyze region features: {IMPORT_ERROR}")
return False
# Note: Original implementation used LibVIPS handler to crop region, then OpenCV
# for feature detection (goodFeaturesToTrack, Canny edge detection, variance).
# If re-implementing, use OpenCV directly for both cropping and analysis.
# Feature detection would use: cv2.goodFeaturesToTrack, cv2.Canny, np.var
return False
def save_template_to_file(watch, screenshot_bytes, x, y, width, height):
"""
Extract the template region and save as cropped_image_template.png in watch data directory.
This is a convenience wrapper around handler.save_template() that handles
watch directory setup and path construction.
Args:
watch: Watch object
screenshot_bytes: Full screenshot as bytes
x, y, width, height: Bounding box coordinates
"""
# Template matching disabled - would need OpenCV implementation for template saving
if not TEMPLATE_MATCHING_ENABLED:
logger.warning(f"Cannot save template: {IMPORT_ERROR}")
return
# Note: Original implementation used LibVIPS handler to crop and save region.
# If re-implementing, use OpenCV (cv2.imdecode, crop with array slicing, cv2.imwrite).
return

View File

@@ -1,117 +0,0 @@
"""
Configuration forms for fast screenshot comparison processor.
"""
from wtforms import SelectField, StringField, FloatField, validators, ValidationError
from changedetectionio.forms import processor_text_json_diff_form
import re
from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS
def validate_bounding_box(form, field):
"""Validate bounding box format: x,y,width,height with integers."""
if not field.data:
return # Optional field
if len(field.data) > 100:
raise ValidationError('Bounding box value is too long')
# Should be comma-separated integers
if not re.match(r'^\d+,\d+,\d+,\d+$', field.data):
raise ValidationError('Bounding box must be in format: x,y,width,height (integers only)')
# Validate values are reasonable (not negative, not ridiculously large)
parts = [int(p) for p in field.data.split(',')]
for part in parts:
if part < 0:
raise ValidationError('Bounding box values must be non-negative')
if part > 10000: # Reasonable max screen dimension
raise ValidationError('Bounding box values are too large')
def validate_selection_mode(form, field):
"""Validate selection mode value."""
if not field.data:
return # Optional field
if field.data not in ['element', 'draw']:
raise ValidationError('Selection mode must be either "element" or "draw"')
class processor_settings_form(processor_text_json_diff_form):
"""Form for fast image comparison processor settings."""
min_change_percentage = FloatField(
'Minimum Change Percentage',
validators=[
validators.Optional(),
validators.NumberRange(min=0.0, max=100.0, message='Must be between 0 and 100')
],
render_kw={"placeholder": "Use global default (0.1)"}
)
comparison_threshold = SelectField(
'Pixel Difference Sensitivity',
choices=[
('', 'Use global default')
] + SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS,
validators=[validators.Optional()],
default=''
)
# Processor-specific config fields (stored in separate JSON file)
processor_config_bounding_box = StringField(
'Bounding Box',
validators=[
validators.Optional(),
validators.Length(max=100, message='Bounding box value is too long'),
validate_bounding_box
],
render_kw={"style": "display: none;", "id": "bounding_box"}
)
processor_config_selection_mode = StringField(
'Selection Mode',
validators=[
validators.Optional(),
validators.Length(max=20, message='Selection mode value is too long'),
validate_selection_mode
],
render_kw={"style": "display: none;", "id": "selection_mode"}
)
def extra_tab_content(self):
"""Tab label for processor-specific settings."""
return 'Screenshot Comparison'
def extra_form_content(self):
"""Render processor-specific form fields."""
return '''
{% from '_helpers.html' import render_field %}
<fieldset>
<legend>Screenshot Comparison Settings</legend>
<div class="pure-control-group">
{{ render_field(form.min_change_percentage) }}
<span class="pure-form-message-inline">
<strong>What percentage of pixels must change to trigger a detection?</strong><br>
For example, <strong>0.1%</strong> means if 0.1% or more of the pixels change, it counts as a change.<br>
Lower values = more sensitive (detect smaller changes).<br>
Higher values = less sensitive (only detect larger changes).<br>
Leave blank to use global default (0.1%).
</span>
</div>
<div class="pure-control-group">
{{ render_field(form.comparison_threshold) }}
<span class="pure-form-message-inline">
<strong>How different must an individual pixel be to count as "changed"?</strong><br>
<strong>Low sensitivity (75)</strong> = Only count pixels that changed significantly (0-255 scale).<br>
<strong>High sensitivity (20)</strong> = Count pixels with small changes as different.<br>
<strong>Very high (0)</strong> = Any pixel change counts.<br>
Select "Use global default" to inherit the system-wide setting.
</span>
</div>
</fieldset>
'''

View File

@@ -1,242 +0,0 @@
"""
Abstract base class for image processing operations.
All image operations for the image_ssim_diff processor must be implemented
through this interface to allow different backends (libvips, OpenCV, PIL, etc.).
"""
from abc import ABC, abstractmethod
from typing import Tuple, Optional, Any
class ImageDiffHandler(ABC):
"""
Abstract base class for image processing operations.
Implementations must handle all image operations needed for screenshot
comparison including loading, cropping, resizing, diffing, and overlays.
"""
@abstractmethod
def load_from_bytes(self, img_bytes: bytes) -> Any:
"""
Load image from bytes.
Args:
img_bytes: Image data as bytes (PNG, JPEG, etc.)
Returns:
Handler-specific image object
"""
pass
@abstractmethod
def save_to_bytes(self, img: Any, format: str = 'png', quality: int = 85) -> bytes:
"""
Save image to bytes.
Args:
img: Handler-specific image object
format: Output format ('png' or 'jpeg')
quality: Quality for JPEG (1-100)
Returns:
Image data as bytes
"""
pass
@abstractmethod
def crop(self, img: Any, left: int, top: int, right: int, bottom: int) -> Any:
"""
Crop image to specified region.
Args:
img: Handler-specific image object
left: Left coordinate
top: Top coordinate
right: Right coordinate
bottom: Bottom coordinate
Returns:
Cropped image object
"""
pass
@abstractmethod
def resize(self, img: Any, max_width: int, max_height: int) -> Any:
"""
Resize image maintaining aspect ratio.
Args:
img: Handler-specific image object
max_width: Maximum width in pixels
max_height: Maximum height in pixels
Returns:
Resized image object
"""
pass
@abstractmethod
def get_dimensions(self, img: Any) -> Tuple[int, int]:
"""
Get image dimensions.
Args:
img: Handler-specific image object
Returns:
Tuple of (width, height)
"""
pass
@abstractmethod
def to_grayscale(self, img: Any) -> Any:
"""
Convert image to grayscale.
Args:
img: Handler-specific image object
Returns:
Grayscale image object
"""
pass
@abstractmethod
def gaussian_blur(self, img: Any, sigma: float) -> Any:
"""
Apply Gaussian blur to image.
Args:
img: Handler-specific image object
sigma: Blur sigma value (0 = no blur)
Returns:
Blurred image object
"""
pass
@abstractmethod
def absolute_difference(self, img1: Any, img2: Any) -> Any:
"""
Calculate absolute difference between two images.
Args:
img1: First image (handler-specific object)
img2: Second image (handler-specific object)
Returns:
Difference image object
"""
pass
@abstractmethod
def threshold(self, img: Any, threshold_value: int) -> Tuple[float, Any]:
"""
Apply threshold to image and calculate change percentage.
Args:
img: Handler-specific image object (typically grayscale difference)
threshold_value: Threshold value (0-255)
Returns:
Tuple of (change_percentage, binary_mask)
- change_percentage: Percentage of pixels above threshold (0-100)
- binary_mask: Handler-specific binary mask object
"""
pass
@abstractmethod
def apply_red_overlay(self, img: Any, mask: Any) -> bytes:
"""
Apply red overlay to image where mask is True.
Args:
img: Handler-specific image object (color)
mask: Handler-specific binary mask object
Returns:
JPEG bytes with red overlay applied
"""
pass
@abstractmethod
def close(self, img: Any) -> None:
"""
Clean up image resources if needed.
Args:
img: Handler-specific image object
"""
pass
@abstractmethod
def find_template(
self,
img: Any,
template_img: Any,
original_bbox: Tuple[int, int, int, int],
search_tolerance: float = 0.2
) -> Optional[Tuple[int, int, int, int]]:
"""
Find template in image using template matching.
Args:
img: Handler-specific image object to search in
template_img: Handler-specific template image object to find
original_bbox: Original bounding box (left, top, right, bottom)
search_tolerance: How far to search (0.2 = ±20% of region size)
Returns:
New bounding box (left, top, right, bottom) or None if not found
"""
pass
@abstractmethod
def save_template(
self,
img: Any,
bbox: Tuple[int, int, int, int],
output_path: str
) -> bool:
"""
Save a cropped region as a template file.
Args:
img: Handler-specific image object
bbox: Bounding box to crop (left, top, right, bottom)
output_path: Where to save the template PNG
Returns:
True if successful, False otherwise
"""
pass
@abstractmethod
def draw_bounding_box(
self,
img_bytes: bytes,
x: int,
y: int,
width: int,
height: int,
color: Tuple[int, int, int] = (255, 0, 0),
thickness: int = 3
) -> bytes:
"""
Draw a bounding box rectangle on image.
Args:
img_bytes: Image data as bytes
x: Left coordinate
y: Top coordinate
width: Box width
height: Box height
color: BGR color tuple (default: blue)
thickness: Line thickness in pixels
Returns:
Image bytes with bounding box drawn
"""
pass

View File

@@ -1,353 +0,0 @@
"""
Subprocess-isolated image operations for memory leak prevention.
LibVIPS accumulates C-level memory in long-running processes that cannot be
reclaimed by Python's GC or libvips cache management. Using subprocess isolation
ensures complete memory cleanup when the process exits.
This module wraps LibvipsImageDiffHandler operations in multiprocessing for
complete memory isolation without code duplication.
Research: https://github.com/libvips/pyvips/issues/234
"""
import multiprocessing
# CRITICAL: Use 'spawn' instead of 'fork' to avoid inheriting parent's
# LibVIPS threading state which can cause hangs in gaussblur operations
# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
try:
multiprocessing.set_start_method('spawn', force=False)
except RuntimeError:
# Already set, ignore
pass
def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
"""
Worker: Generate diff visualization using LibvipsImageDiffHandler in isolated subprocess.
This runs in a separate process for complete memory isolation.
Uses print() instead of loguru to avoid forking issues.
"""
try:
# Import handler inside worker
from .libvips_handler import LibvipsImageDiffHandler
print(f"[Worker] Initializing handler", flush=True)
handler = LibvipsImageDiffHandler()
# Load images using handler
img_from = handler.load_from_bytes(img_bytes_from)
img_to = handler.load_from_bytes(img_bytes_to)
# Ensure same size
w1, h1 = handler.get_dimensions(img_from)
w2, h2 = handler.get_dimensions(img_to)
if (w1, h1) != (w2, h2):
img_from = handler.resize(img_from, w2, h2)
# Downscale for faster diff visualization
img_from = handler.resize(img_from, max_width, max_height)
img_to = handler.resize(img_to, max_width, max_height)
# Convert to grayscale
gray_from = handler.to_grayscale(img_from)
gray_to = handler.to_grayscale(img_to)
# Optional blur - DISABLED due to LibVIPS threading issues in fork
# gray_from = handler.gaussian_blur(gray_from, blur_sigma)
# gray_to = handler.gaussian_blur(gray_to, blur_sigma)
# Calculate difference
diff = handler.absolute_difference(gray_from, gray_to)
# Threshold to get mask
_, diff_mask = handler.threshold(diff, int(threshold))
# Generate diff image with red overlay
diff_image_bytes = handler.apply_red_overlay(img_to, diff_mask)
print(f"[Worker] Generated diff ({len(diff_image_bytes)} bytes)", flush=True)
conn.send(diff_image_bytes)
except Exception as e:
print(f"[Worker] Error: {e}", flush=True)
import traceback
traceback.print_exc()
conn.send(None)
finally:
conn.close()
def generate_diff_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
"""
Generate diff visualization in isolated subprocess for memory leak prevention.
Args:
img_bytes_from: Previous screenshot bytes
img_bytes_to: Current screenshot bytes
threshold: Pixel difference threshold
blur_sigma: Gaussian blur sigma
max_width: Maximum width for diff
max_height: Maximum height for diff
Returns:
bytes: JPEG diff image or None on failure
"""
parent_conn, child_conn = multiprocessing.Pipe()
p = multiprocessing.Process(
target=_worker_generate_diff,
args=(child_conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height)
)
p.start()
result = None
try:
# Wait for result (30 second timeout)
if parent_conn.poll(30):
result = parent_conn.recv()
except Exception as e:
print(f"[Parent] Error receiving result: {e}", flush=True)
finally:
# Always close pipe first
try:
parent_conn.close()
except:
pass
# Try graceful shutdown
p.join(timeout=5)
if p.is_alive():
print("[Parent] Process didn't exit gracefully, terminating", flush=True)
p.terminate()
p.join(timeout=3)
# Force kill if still alive
if p.is_alive():
print("[Parent] Process didn't terminate, killing", flush=True)
p.kill()
p.join(timeout=1)
return result
def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height):
"""
Calculate change percentage in isolated subprocess using handler.
Returns:
float: Change percentage
"""
parent_conn, child_conn = multiprocessing.Pipe()
def _worker_calculate(conn):
try:
# Import handler inside worker
from .libvips_handler import LibvipsImageDiffHandler
handler = LibvipsImageDiffHandler()
# Load images
img_from = handler.load_from_bytes(img_bytes_from)
img_to = handler.load_from_bytes(img_bytes_to)
# Ensure same size
w1, h1 = handler.get_dimensions(img_from)
w2, h2 = handler.get_dimensions(img_to)
if (w1, h1) != (w2, h2):
img_from = handler.resize(img_from, w2, h2)
# Downscale
img_from = handler.resize(img_from, max_width, max_height)
img_to = handler.resize(img_to, max_width, max_height)
# Convert to grayscale
gray_from = handler.to_grayscale(img_from)
gray_to = handler.to_grayscale(img_to)
# Optional blur
gray_from = handler.gaussian_blur(gray_from, blur_sigma)
gray_to = handler.gaussian_blur(gray_to, blur_sigma)
# Calculate difference
diff = handler.absolute_difference(gray_from, gray_to)
# Threshold and get percentage
change_percentage, _ = handler.threshold(diff, int(threshold))
conn.send(float(change_percentage))
except Exception as e:
print(f"[Worker] Calculate error: {e}", flush=True)
conn.send(0.0)
finally:
conn.close()
p = multiprocessing.Process(target=_worker_calculate, args=(child_conn,))
p.start()
result = 0.0
try:
if parent_conn.poll(30):
result = parent_conn.recv()
except Exception as e:
print(f"[Parent] Calculate error receiving result: {e}", flush=True)
finally:
# Always close pipe first
try:
parent_conn.close()
except:
pass
# Try graceful shutdown
p.join(timeout=5)
if p.is_alive():
print("[Parent] Calculate process didn't exit gracefully, terminating", flush=True)
p.terminate()
p.join(timeout=3)
# Force kill if still alive
if p.is_alive():
print("[Parent] Calculate process didn't terminate, killing", flush=True)
p.kill()
p.join(timeout=1)
return result
def compare_images_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, min_change_percentage, crop_region=None):
"""
Compare images in isolated subprocess for change detection.
Args:
img_bytes_from: Previous screenshot bytes
img_bytes_to: Current screenshot bytes
threshold: Pixel difference threshold
blur_sigma: Gaussian blur sigma
min_change_percentage: Minimum percentage to trigger change detection
crop_region: Optional tuple (left, top, right, bottom) for cropping both images
Returns:
tuple: (changed_detected, change_percentage)
"""
print(f"[Parent] Starting compare_images_isolated subprocess", flush=True)
parent_conn, child_conn = multiprocessing.Pipe()
def _worker_compare(conn):
try:
print(f"[Worker] Compare worker starting", flush=True)
# Import handler inside worker
from .libvips_handler import LibvipsImageDiffHandler
print(f"[Worker] Initializing handler", flush=True)
handler = LibvipsImageDiffHandler()
# Load images
print(f"[Worker] Loading images (from={len(img_bytes_from)} bytes, to={len(img_bytes_to)} bytes)", flush=True)
img_from = handler.load_from_bytes(img_bytes_from)
img_to = handler.load_from_bytes(img_bytes_to)
print(f"[Worker] Images loaded", flush=True)
# Crop if region specified
if crop_region:
print(f"[Worker] Cropping to region {crop_region}", flush=True)
left, top, right, bottom = crop_region
img_from = handler.crop(img_from, left, top, right, bottom)
img_to = handler.crop(img_to, left, top, right, bottom)
print(f"[Worker] Cropping completed", flush=True)
# Ensure same size
w1, h1 = handler.get_dimensions(img_from)
w2, h2 = handler.get_dimensions(img_to)
print(f"[Worker] Image dimensions: from={w1}x{h1}, to={w2}x{h2}", flush=True)
if (w1, h1) != (w2, h2):
print(f"[Worker] Resizing to match dimensions", flush=True)
img_from = handler.resize(img_from, w2, h2)
# Convert to grayscale
print(f"[Worker] Converting to grayscale", flush=True)
gray_from = handler.to_grayscale(img_from)
gray_to = handler.to_grayscale(img_to)
# Optional blur
# NOTE: gaussblur can hang in forked subprocesses due to LibVIPS threading
# Skip blur as a workaround - sigma=0.8 is subtle and comparison works without it
if blur_sigma > 0:
print(f"[Worker] Skipping blur (sigma={blur_sigma}) due to LibVIPS threading issues in fork", flush=True)
# gray_from = handler.gaussian_blur(gray_from, blur_sigma)
# gray_to = handler.gaussian_blur(gray_to, blur_sigma)
# Calculate difference
print(f"[Worker] Calculating difference", flush=True)
diff = handler.absolute_difference(gray_from, gray_to)
# Threshold and get percentage
print(f"[Worker] Applying threshold ({threshold})", flush=True)
change_percentage, _ = handler.threshold(diff, int(threshold))
# Determine if change detected
changed_detected = change_percentage > min_change_percentage
print(f"[Worker] Comparison complete: changed={changed_detected}, percentage={change_percentage:.2f}%", flush=True)
conn.send((changed_detected, float(change_percentage)))
except Exception as e:
print(f"[Worker] Compare error: {e}", flush=True)
import traceback
traceback.print_exc()
conn.send((False, 0.0))
finally:
conn.close()
p = multiprocessing.Process(target=_worker_compare, args=(child_conn,))
print(f"[Parent] Starting subprocess (pid will be assigned)", flush=True)
p.start()
print(f"[Parent] Subprocess started (pid={p.pid}), waiting for result (30s timeout)", flush=True)
result = (False, 0.0)
try:
if parent_conn.poll(30):
print(f"[Parent] Result available, receiving", flush=True)
result = parent_conn.recv()
print(f"[Parent] Result received: {result}", flush=True)
else:
print(f"[Parent] Timeout waiting for result after 30s", flush=True)
except Exception as e:
print(f"[Parent] Compare error receiving result: {e}", flush=True)
finally:
# Always close pipe first
try:
parent_conn.close()
except:
pass
# Try graceful shutdown
import time
print(f"[Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
join_start = time.time()
p.join(timeout=5)
join_elapsed = time.time() - join_start
print(f"[Parent] First join took {join_elapsed:.2f}s", flush=True)
if p.is_alive():
print("[Parent] Compare process didn't exit gracefully, terminating", flush=True)
term_start = time.time()
p.terminate()
p.join(timeout=3)
term_elapsed = time.time() - term_start
print(f"[Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
# Force kill if still alive
if p.is_alive():
print("[Parent] Compare process didn't terminate, killing", flush=True)
kill_start = time.time()
p.kill()
p.join(timeout=1)
kill_elapsed = time.time() - kill_start
print(f"[Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
print(f"[Parent] Subprocess cleanup complete, returning result", flush=True)
return result

View File

@@ -1,627 +0,0 @@
"""
OpenCV-based subprocess isolation for image comparison.
OpenCV is much more stable in multiprocessing contexts than LibVIPS.
No threading issues, no fork problems, picklable functions.
"""
import multiprocessing
import numpy as np
from .. import POLL_TIMEOUT_ABSOLUTE
# Public implementation name for logging
IMPLEMENTATION_NAME = "OpenCV"
def _worker_compare(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region):
"""
Worker function for image comparison (must be top-level for pickling with spawn).
Args:
conn: Pipe connection for sending results
img_bytes_from: Previous screenshot bytes
img_bytes_to: Current screenshot bytes
pixel_difference_threshold: Pixel-level sensitivity (0-255) - how different must a pixel be to count as changed
blur_sigma: Gaussian blur sigma
crop_region: Optional (left, top, right, bottom) crop coordinates
"""
import time
try:
import cv2
# CRITICAL: Disable OpenCV threading to prevent thread explosion
# With multiprocessing, each subprocess would otherwise spawn threads equal to CPU cores
# This causes excessive thread counts and memory overhead
# Research: https://medium.com/@rachittayal7/a-note-on-opencv-threads-performance-in-prod-d10180716fba
cv2.setNumThreads(1)
print(f"[{time.time():.3f}] [Worker] Compare worker starting (threads=1 for memory optimization)", flush=True)
# Decode images from bytes
print(f"[{time.time():.3f}] [Worker] Loading images (from={len(img_bytes_from)} bytes, to={len(img_bytes_to)} bytes)", flush=True)
img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR)
img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR)
# Check if decoding succeeded
if img_from is None:
raise ValueError("Failed to decode 'from' image - may be corrupt or unsupported format")
if img_to is None:
raise ValueError("Failed to decode 'to' image - may be corrupt or unsupported format")
print(f"[{time.time():.3f}] [Worker] Images loaded: from={img_from.shape}, to={img_to.shape}", flush=True)
# Crop if region specified
if crop_region:
print(f"[{time.time():.3f}] [Worker] Cropping to region {crop_region}", flush=True)
left, top, right, bottom = crop_region
img_from = img_from[top:bottom, left:right]
img_to = img_to[top:bottom, left:right]
print(f"[{time.time():.3f}] [Worker] Cropped: from={img_from.shape}, to={img_to.shape}", flush=True)
# Resize if dimensions don't match
if img_from.shape != img_to.shape:
print(f"[{time.time():.3f}] [Worker] Resizing to match dimensions", flush=True)
img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0]))
# Convert to grayscale
print(f"[{time.time():.3f}] [Worker] Converting to grayscale", flush=True)
gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY)
gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY)
# Optional Gaussian blur
if blur_sigma > 0:
print(f"[{time.time():.3f}] [Worker] Applying Gaussian blur (sigma={blur_sigma})", flush=True)
# OpenCV uses kernel size, convert sigma to kernel size: size = 2 * round(3*sigma) + 1
ksize = int(2 * round(3 * blur_sigma)) + 1
if ksize % 2 == 0: # Must be odd
ksize += 1
gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma)
gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma)
print(f"[{time.time():.3f}] [Worker] Blur applied (kernel={ksize}x{ksize})", flush=True)
# Calculate absolute difference
print(f"[{time.time():.3f}] [Worker] Calculating absolute difference", flush=True)
diff = cv2.absdiff(gray_from, gray_to)
# Apply threshold
print(f"[{time.time():.3f}] [Worker] Applying pixel difference threshold ({pixel_difference_threshold})", flush=True)
_, thresholded = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY)
# Calculate change percentage
total_pixels = thresholded.size
changed_pixels = np.count_nonzero(thresholded)
change_percentage = (changed_pixels / total_pixels) * 100.0
print(f"[{time.time():.3f}] [Worker] Comparison complete: percentage={change_percentage:.2f}%", flush=True)
# Return only the score - let the caller decide if it's a "change"
conn.send(float(change_percentage))
except Exception as e:
print(f"[{time.time():.3f}] [Worker] Error: {e}", flush=True)
import traceback
traceback.print_exc()
# Send error info as dict so parent can re-raise
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
finally:
conn.close()
async def compare_images_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region=None):
"""
Compare images in isolated subprocess using OpenCV (async-safe).
Args:
img_bytes_from: Previous screenshot bytes
img_bytes_to: Current screenshot bytes
pixel_difference_threshold: Pixel-level sensitivity (0-255) - how different must a pixel be to count as changed
blur_sigma: Gaussian blur sigma
crop_region: Optional (left, top, right, bottom) crop coordinates
Returns:
float: Change percentage (0-100)
"""
import time
import asyncio
print(f"[{time.time():.3f}] [Parent] Starting OpenCV comparison subprocess", flush=True)
# Use spawn method for clean process (no fork issues)
ctx = multiprocessing.get_context('spawn')
parent_conn, child_conn = ctx.Pipe()
p = ctx.Process(
target=_worker_compare,
args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region)
)
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
p.start()
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
result = 0.0
try:
# Async-friendly polling: check in small intervals without blocking event loop
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
while time.time() < deadline:
# Run poll() in thread to avoid blocking event loop
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
if has_data:
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
result = await asyncio.to_thread(parent_conn.recv)
# Check if result is an error dict
if isinstance(result, dict) and 'error' in result:
raise RuntimeError(f"Image comparison failed: {result['error']}")
print(f"[{time.time():.3f}] [Parent] Result received: {result:.2f}%", flush=True)
break
await asyncio.sleep(0) # Yield control to event loop
else:
from loguru import logger
logger.critical(f"[OpenCV subprocess] Timeout waiting for compare_images result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
raise TimeoutError(f"Image comparison subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
except Exception as e:
print(f"[{time.time():.3f}] [Parent] Error receiving result: {e}", flush=True)
raise
finally:
# Always close pipe first
try:
parent_conn.close()
except:
pass
# Try graceful shutdown (async-safe)
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
join_start = time.time()
await asyncio.to_thread(p.join, 5)
join_elapsed = time.time() - join_start
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
if p.is_alive():
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
term_start = time.time()
p.terminate()
await asyncio.to_thread(p.join, 3)
term_elapsed = time.time() - term_start
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
# Force kill if still alive
if p.is_alive():
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
kill_start = time.time()
p.kill()
await asyncio.to_thread(p.join, 1)
kill_elapsed = time.time() - kill_start
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
return result
def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
"""
Worker function for generating visual diff with red overlay.
"""
import time
try:
import cv2
cv2.setNumThreads(1)
print(f"[{time.time():.3f}] [Worker] Generate diff worker starting", flush=True)
# Decode images
img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR)
img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR)
# Resize if needed to match dimensions
if img_from.shape != img_to.shape:
img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0]))
# Downscale to max dimensions for faster processing
h, w = img_to.shape[:2]
if w > max_width or h > max_height:
scale = min(max_width / w, max_height / h)
new_w = int(w * scale)
new_h = int(h * scale)
img_from = cv2.resize(img_from, (new_w, new_h))
img_to = cv2.resize(img_to, (new_w, new_h))
# Convert to grayscale
gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY)
gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY)
# Optional blur
if blur_sigma > 0:
ksize = int(2 * round(3 * blur_sigma)) + 1
if ksize % 2 == 0:
ksize += 1
gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma)
gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma)
# Calculate difference
diff = cv2.absdiff(gray_from, gray_to)
# Apply threshold to get mask
_, mask = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY)
# Create red overlay on original 'to' image
# Where mask is 255 (changed), blend 50% red
overlay = img_to.copy()
overlay[:, :, 2] = np.where(mask > 0,
np.clip(overlay[:, :, 2] * 0.5 + 127, 0, 255).astype(np.uint8),
overlay[:, :, 2])
overlay[:, :, 0:2] = np.where(mask[:, :, np.newaxis] > 0,
(overlay[:, :, 0:2] * 0.5).astype(np.uint8),
overlay[:, :, 0:2])
# Encode as JPEG
_, encoded = cv2.imencode('.jpg', overlay, [cv2.IMWRITE_JPEG_QUALITY, 85])
diff_bytes = encoded.tobytes()
print(f"[{time.time():.3f}] [Worker] Generated diff ({len(diff_bytes)} bytes)", flush=True)
conn.send(diff_bytes)
except Exception as e:
print(f"[{time.time():.3f}] [Worker] Generate diff error: {e}", flush=True)
import traceback
traceback.print_exc()
# Send error info as dict so parent can re-raise
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
finally:
conn.close()
async def generate_diff_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
"""
Generate visual diff with red overlay in isolated subprocess (async-safe).
Returns:
bytes: JPEG diff image or None on failure
"""
import time
import asyncio
print(f"[{time.time():.3f}] [Parent] Starting generate_diff subprocess", flush=True)
ctx = multiprocessing.get_context('spawn')
parent_conn, child_conn = ctx.Pipe()
p = ctx.Process(
target=_worker_generate_diff,
args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height)
)
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
p.start()
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
result = None
try:
# Async-friendly polling: check in small intervals without blocking event loop
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
while time.time() < deadline:
# Run poll() in thread to avoid blocking event loop
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
if has_data:
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
result = await asyncio.to_thread(parent_conn.recv)
# Check if result is an error dict
if isinstance(result, dict) and 'error' in result:
raise RuntimeError(f"Generate diff failed: {result['error']}")
print(f"[{time.time():.3f}] [Parent] Result received ({len(result) if result else 0} bytes)", flush=True)
break
await asyncio.sleep(0) # Yield control to event loop
else:
from loguru import logger
logger.critical(f"[OpenCV subprocess] Timeout waiting for generate_diff result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
raise TimeoutError(f"Generate diff subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
except Exception as e:
print(f"[{time.time():.3f}] [Parent] Error receiving diff: {e}", flush=True)
raise
finally:
# Always close pipe first
try:
parent_conn.close()
except:
pass
# Try graceful shutdown (async-safe)
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
join_start = time.time()
await asyncio.to_thread(p.join, 5)
join_elapsed = time.time() - join_start
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
if p.is_alive():
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
term_start = time.time()
p.terminate()
await asyncio.to_thread(p.join, 3)
term_elapsed = time.time() - term_start
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
if p.is_alive():
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
kill_start = time.time()
p.kill()
await asyncio.to_thread(p.join, 1)
kill_elapsed = time.time() - kill_start
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
return result
def _worker_draw_bounding_box(conn, img_bytes, x, y, width, height, color, thickness):
"""
Worker function for drawing bounding box on image.
"""
import time
try:
import cv2
cv2.setNumThreads(1)
print(f"[{time.time():.3f}] [Worker] Draw bounding box worker starting", flush=True)
# Decode image
img = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_COLOR)
if img is None:
print(f"[{time.time():.3f}] [Worker] Failed to decode image", flush=True)
conn.send(None)
return
# Draw rectangle (BGR format)
cv2.rectangle(img, (x, y), (x + width, y + height), color, thickness)
# Encode back to PNG
_, encoded = cv2.imencode('.png', img)
result_bytes = encoded.tobytes()
print(f"[{time.time():.3f}] [Worker] Bounding box drawn ({len(result_bytes)} bytes)", flush=True)
conn.send(result_bytes)
except Exception as e:
print(f"[{time.time():.3f}] [Worker] Draw bounding box error: {e}", flush=True)
import traceback
traceback.print_exc()
# Send error info as dict so parent can re-raise
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
finally:
conn.close()
async def draw_bounding_box_isolated(img_bytes, x, y, width, height, color=(255, 0, 0), thickness=3):
"""
Draw bounding box on image in isolated subprocess (async-safe).
Args:
img_bytes: Image data as bytes
x: Left coordinate
y: Top coordinate
width: Box width
height: Box height
color: BGR color tuple (default: blue)
thickness: Line thickness in pixels
Returns:
bytes: PNG image with bounding box or None on failure
"""
import time
import asyncio
print(f"[{time.time():.3f}] [Parent] Starting draw_bounding_box subprocess", flush=True)
ctx = multiprocessing.get_context('spawn')
parent_conn, child_conn = ctx.Pipe()
p = ctx.Process(
target=_worker_draw_bounding_box,
args=(child_conn, img_bytes, x, y, width, height, color, thickness)
)
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
p.start()
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
result = None
try:
# Async-friendly polling: check in small intervals without blocking event loop
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
while time.time() < deadline:
# Run poll() in thread to avoid blocking event loop
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
if has_data:
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
# Run recv() in thread too
result = await asyncio.to_thread(parent_conn.recv)
# Check if result is an error dict
if isinstance(result, dict) and 'error' in result:
raise RuntimeError(f"Draw bounding box failed: {result['error']}")
print(f"[{time.time():.3f}] [Parent] Result received ({len(result) if result else 0} bytes)", flush=True)
break
# Yield control to event loop
await asyncio.sleep(0)
else:
from loguru import logger
logger.critical(f"[OpenCV subprocess] Timeout waiting for draw_bounding_box result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
raise TimeoutError(f"Draw bounding box subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
except Exception as e:
print(f"[{time.time():.3f}] [Parent] Error receiving result: {e}", flush=True)
raise
finally:
# Always close pipe first
try:
parent_conn.close()
except:
pass
# Try graceful shutdown (run join in thread to avoid blocking)
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (3s timeout)", flush=True)
join_start = time.time()
await asyncio.to_thread(p.join, 3)
join_elapsed = time.time() - join_start
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
if p.is_alive():
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
term_start = time.time()
p.terminate()
await asyncio.to_thread(p.join, 2)
term_elapsed = time.time() - term_start
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
if p.is_alive():
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
kill_start = time.time()
p.kill()
await asyncio.to_thread(p.join, 1)
kill_elapsed = time.time() - kill_start
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
return result
def _worker_calculate_percentage(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
"""
Worker function for calculating change percentage.
"""
import time
try:
import cv2
cv2.setNumThreads(1)
# Decode images
img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR)
img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR)
# Resize if needed
if img_from.shape != img_to.shape:
img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0]))
# Downscale to max dimensions
h, w = img_to.shape[:2]
if w > max_width or h > max_height:
scale = min(max_width / w, max_height / h)
new_w = int(w * scale)
new_h = int(h * scale)
img_from = cv2.resize(img_from, (new_w, new_h))
img_to = cv2.resize(img_to, (new_w, new_h))
# Convert to grayscale
gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY)
gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY)
# Optional blur
if blur_sigma > 0:
ksize = int(2 * round(3 * blur_sigma)) + 1
if ksize % 2 == 0:
ksize += 1
gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma)
gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma)
# Calculate difference
diff = cv2.absdiff(gray_from, gray_to)
# Apply threshold
_, thresholded = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY)
# Calculate percentage
total_pixels = thresholded.size
changed_pixels = np.count_nonzero(thresholded)
change_percentage = (changed_pixels / total_pixels) * 100.0
conn.send(float(change_percentage))
except Exception as e:
print(f"[{time.time():.3f}] [Worker] Calculate percentage error: {e}", flush=True)
import traceback
traceback.print_exc()
# Send error info as dict so parent can re-raise
conn.send({'error': str(e), 'traceback': traceback.format_exc()})
finally:
conn.close()
async def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height):
"""
Calculate change percentage in isolated subprocess (async-safe).
Returns:
float: Change percentage
"""
import time
import asyncio
print(f"[{time.time():.3f}] [Parent] Starting calculate_percentage subprocess", flush=True)
ctx = multiprocessing.get_context('spawn')
parent_conn, child_conn = ctx.Pipe()
p = ctx.Process(
target=_worker_calculate_percentage,
args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height)
)
print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True)
p.start()
print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True)
result = 0.0
try:
# Async-friendly polling: check in small intervals without blocking event loop
deadline = time.time() + POLL_TIMEOUT_ABSOLUTE
while time.time() < deadline:
# Run poll() in thread to avoid blocking event loop
has_data = await asyncio.to_thread(parent_conn.poll, 0.1)
if has_data:
print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True)
result = await asyncio.to_thread(parent_conn.recv)
# Check if result is an error dict
if isinstance(result, dict) and 'error' in result:
raise RuntimeError(f"Calculate change percentage failed: {result['error']}")
print(f"[{time.time():.3f}] [Parent] Result received: {result:.2f}%", flush=True)
break
await asyncio.sleep(0) # Yield control to event loop
else:
from loguru import logger
logger.critical(f"[OpenCV subprocess] Timeout waiting for calculate_change_percentage result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)")
print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True)
raise TimeoutError(f"Calculate change percentage subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s")
except Exception as e:
print(f"[{time.time():.3f}] [Parent] Error receiving percentage: {e}", flush=True)
raise
finally:
# Always close pipe first
try:
parent_conn.close()
except:
pass
# Try graceful shutdown (async-safe)
print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True)
join_start = time.time()
await asyncio.to_thread(p.join, 5)
join_elapsed = time.time() - join_start
print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True)
if p.is_alive():
print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True)
term_start = time.time()
p.terminate()
await asyncio.to_thread(p.join, 3)
term_elapsed = time.time() - term_start
print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True)
if p.is_alive():
print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True)
kill_start = time.time()
p.kill()
await asyncio.to_thread(p.join, 1)
kill_elapsed = time.time() - kill_start
print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True)
print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True)
return result

View File

@@ -1,340 +0,0 @@
"""
LibVIPS implementation of ImageDiffHandler.
Uses pyvips for high-performance image processing with streaming architecture
and low memory footprint. Ideal for large screenshots (8000px+).
"""
from __future__ import annotations
import os
from typing import Tuple, Any, TYPE_CHECKING
from loguru import logger
if TYPE_CHECKING:
import pyvips
try:
import pyvips
PYVIPS_AVAILABLE = True
except ImportError:
PYVIPS_AVAILABLE = False
logger.warning("pyvips not available - install with: pip install pyvips")
from . import ImageDiffHandler
class LibvipsImageDiffHandler(ImageDiffHandler):
"""
LibVIPS implementation using streaming architecture.
Benefits:
- 3x faster than ImageMagick
- 5x less memory than PIL
- Automatic multi-threading
- Streaming - processes images in chunks
"""
def __init__(self):
if not PYVIPS_AVAILABLE:
raise ImportError("pyvips is not installed. Install with: pip install pyvips")
def load_from_bytes(self, img_bytes: bytes) -> pyvips.Image:
"""Load image from bytes using libvips streaming."""
return pyvips.Image.new_from_buffer(img_bytes, '')
def save_to_bytes(self, img: pyvips.Image, format: str = 'png', quality: int = 85) -> bytes:
"""
Save image to bytes using temp file.
Note: Uses temp file instead of write_to_buffer() to avoid C memory leak.
See: https://github.com/libvips/pyvips/issues/234
"""
import tempfile
format = format.lower()
try:
if format == 'png':
suffix = '.png'
write_args = {'compression': 6}
elif format in ['jpg', 'jpeg']:
suffix = '.jpg'
write_args = {'Q': quality}
else:
raise ValueError(f"Unsupported format: {format}")
# Use temp file to avoid write_to_buffer() memory leak
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
temp_path = tmp.name
# Write to file
img.write_to_file(temp_path, **write_args)
# Read bytes and clean up
with open(temp_path, 'rb') as f:
image_bytes = f.read()
os.unlink(temp_path)
return image_bytes
except Exception as e:
logger.error(f"Failed to save via temp file: {e}")
# Fallback to write_to_buffer if temp file fails
if format == 'png':
return img.write_to_buffer('.png', compression=6)
else:
return img.write_to_buffer('.jpg', Q=quality)
def crop(self, img: pyvips.Image, left: int, top: int, right: int, bottom: int) -> pyvips.Image:
"""Crop image using libvips."""
width = right - left
height = bottom - top
return img.crop(left, top, width, height)
def resize(self, img: pyvips.Image, max_width: int, max_height: int) -> pyvips.Image:
"""
Resize image maintaining aspect ratio.
Uses thumbnail_image for efficient downscaling with streaming.
"""
width, height = img.width, img.height
if width <= max_width and height <= max_height:
return img
# Calculate scaling to fit within max dimensions
width_ratio = max_width / width if width > max_width else 1.0
height_ratio = max_height / height if height > max_height else 1.0
ratio = min(width_ratio, height_ratio)
new_width = int(width * ratio)
new_height = int(height * ratio)
logger.debug(f"Resizing image: {width}x{height} -> {new_width}x{new_height}")
# thumbnail_image is faster than resize for downscaling
return img.thumbnail_image(new_width, height=new_height)
def get_dimensions(self, img: pyvips.Image) -> Tuple[int, int]:
"""Get image dimensions."""
return (img.width, img.height)
def to_grayscale(self, img: pyvips.Image) -> pyvips.Image:
"""Convert to grayscale using 'b-w' colorspace."""
return img.colourspace('b-w')
def gaussian_blur(self, img: pyvips.Image, sigma: float) -> pyvips.Image:
"""Apply Gaussian blur."""
if sigma > 0:
return img.gaussblur(sigma)
return img
def absolute_difference(self, img1: pyvips.Image, img2: pyvips.Image) -> pyvips.Image:
"""
Calculate absolute difference using operator overloading.
LibVIPS supports arithmetic operations between images.
"""
return (img1 - img2).abs()
def threshold(self, img: pyvips.Image, threshold_value: int) -> Tuple[float, pyvips.Image]:
"""
Apply threshold and calculate change percentage.
Uses ifthenelse for efficient thresholding.
"""
# Create binary mask: pixels above threshold = 255, others = 0
mask = (img > threshold_value).ifthenelse(255, 0)
# Calculate percentage by averaging mask values
# avg() returns mean pixel value (0-255)
# Divide by 255 to get proportion, multiply by 100 for percentage
mean_value = mask.avg()
change_percentage = (mean_value / 255.0) * 100.0
return float(change_percentage), mask
def apply_red_overlay(self, img: pyvips.Image, mask: pyvips.Image) -> bytes:
"""
Apply red overlay where mask is True (50% blend).
Args:
img: Color image (will be converted to RGB if needed)
mask: Binary mask (255 where changed, 0 elsewhere)
Returns:
JPEG bytes with red overlay
"""
import tempfile
# Ensure RGB colorspace
if img.bands == 1:
img = img.colourspace('srgb')
# Normalize mask to 0-1 range for blending
mask_normalized = mask / 255.0
# Split into R, G, B channels
channels = img.bandsplit()
r, g, b = channels[0], channels[1], channels[2]
# Apply red overlay (50% blend):
# Where mask is 1: blend 50% original with 50% red (255)
# Where mask is 0: keep original
r = r * (1 - mask_normalized * 0.5) + 127.5 * mask_normalized
g = g * (1 - mask_normalized * 0.5)
b = b * (1 - mask_normalized * 0.5)
# Recombine channels
result = r.bandjoin([g, b])
# CRITICAL: Use temp file instead of write_to_buffer()
# write_to_buffer() leaks C memory that isn't returned to OS
# See: https://github.com/libvips/pyvips/issues/234
try:
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp:
temp_path = tmp.name
# Write to file (doesn't leak like write_to_buffer)
result.write_to_file(temp_path, Q=85)
# Read bytes and clean up
with open(temp_path, 'rb') as f:
image_bytes = f.read()
os.unlink(temp_path)
return image_bytes
except Exception as e:
logger.error(f"Failed to write image via temp file: {e}")
# Fallback to write_to_buffer if temp file fails
return result.write_to_buffer('.jpg', Q=85)
def close(self, img: pyvips.Image) -> None:
"""
LibVIPS uses automatic reference counting.
No explicit cleanup needed - memory freed when references drop to zero.
"""
pass
def find_template(
self,
img: pyvips.Image,
template_img: pyvips.Image,
original_bbox: Tuple[int, int, int, int],
search_tolerance: float = 0.2
) -> Tuple[int, int, int, int]:
"""
Find template in image using OpenCV template matching.
Note: This temporarily converts to numpy for OpenCV operations since
libvips doesn't have template matching built-in.
"""
import cv2
import numpy as np
try:
left, top, right, bottom = original_bbox
width = right - left
height = bottom - top
# Calculate search region
margin_x = int(width * search_tolerance)
margin_y = int(height * search_tolerance)
search_left = max(0, left - margin_x)
search_top = max(0, top - margin_y)
search_right = min(img.width, right + margin_x)
search_bottom = min(img.height, bottom + margin_y)
# Crop search region
search_region = self.crop(img, search_left, search_top, search_right, search_bottom)
# Convert to numpy arrays for OpenCV
search_array = np.ndarray(
buffer=search_region.write_to_memory(),
dtype=np.uint8,
shape=[search_region.height, search_region.width, search_region.bands]
)
template_array = np.ndarray(
buffer=template_img.write_to_memory(),
dtype=np.uint8,
shape=[template_img.height, template_img.width, template_img.bands]
)
# Convert to grayscale
if len(search_array.shape) == 3:
search_gray = cv2.cvtColor(search_array, cv2.COLOR_RGB2GRAY)
else:
search_gray = search_array
if len(template_array.shape) == 3:
template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY)
else:
template_gray = template_array
logger.debug(f"Searching for template in region: ({search_left}, {search_top}) to ({search_right}, {search_bottom})")
# Perform template matching
result = cv2.matchTemplate(search_gray, template_gray, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
logger.debug(f"Template matching confidence: {max_val:.2%}")
# Check if match is good enough (80% confidence threshold)
if max_val >= 0.8:
# Calculate new bounding box in original image coordinates
match_x = search_left + max_loc[0]
match_y = search_top + max_loc[1]
new_bbox = (match_x, match_y, match_x + width, match_y + height)
# Calculate movement distance
move_x = abs(match_x - left)
move_y = abs(match_y - top)
logger.info(f"Template found at ({match_x}, {match_y}), "
f"moved {move_x}px horizontally, {move_y}px vertically, "
f"confidence: {max_val:.2%}")
return new_bbox
else:
logger.warning(f"Template match confidence too low: {max_val:.2%} (need 80%)")
return None
except Exception as e:
logger.error(f"Template matching error: {e}")
return None
def save_template(
self,
img: pyvips.Image,
bbox: Tuple[int, int, int, int],
output_path: str
) -> bool:
"""
Save a cropped region as a template file.
"""
import os
try:
left, top, right, bottom = bbox
width = right - left
height = bottom - top
# Ensure output directory exists
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# Crop template region
template = self.crop(img, left, top, right, bottom)
# Save as PNG
template.write_to_file(output_path, compression=6)
logger.info(f"Saved template: {output_path} ({width}x{height}px)")
return True
except Exception as e:
logger.error(f"Failed to save template: {e}")
return False

View File

@@ -1,109 +0,0 @@
"""
Preview rendering for SSIM screenshot processor.
Renders images properly in the browser instead of showing raw bytes.
"""
from loguru import logger
def get_asset(asset_name, watch, datastore, request):
"""
Get processor-specific binary assets for preview streaming.
This function supports serving images as separate HTTP responses instead
of embedding them as base64 in the HTML template, solving memory issues
with large screenshots.
Supported assets:
- 'screenshot': The screenshot for the specified version
Args:
asset_name: Name of the asset to retrieve ('screenshot')
watch: Watch object
datastore: Datastore object
request: Flask request (for version query param)
Returns:
tuple: (binary_data, content_type, cache_control_header) or None if not found
"""
if asset_name != 'screenshot':
return None
versions = list(watch.history.keys())
if len(versions) == 0:
return None
# Get the version from query string (default: latest)
preferred_version = request.args.get('version')
timestamp = versions[-1]
if preferred_version and preferred_version in versions:
timestamp = preferred_version
try:
screenshot_bytes = watch.get_history_snapshot(timestamp=timestamp)
# Verify we got bytes (should always be bytes for image files)
if not isinstance(screenshot_bytes, bytes):
logger.error(f"Expected bytes but got {type(screenshot_bytes)} for screenshot at {timestamp}")
return None
# Detect image format using puremagic (same as Watch.py)
try:
import puremagic
detections = puremagic.magic_string(screenshot_bytes[:2048])
if detections:
mime_type = detections[0].mime_type
logger.trace(f"Detected MIME type: {mime_type}")
else:
mime_type = 'image/png' # Default fallback
except Exception as e:
logger.warning(f"puremagic detection failed: {e}, using 'image/png' fallback")
mime_type = 'image/png'
return (screenshot_bytes, mime_type, 'public, max-age=10')
except Exception as e:
logger.error(f"Failed to load screenshot for preview asset: {e}")
return None
def render(watch, datastore, request, url_for, render_template, flash, redirect):
"""
Render the preview page for screenshot watches.
Args:
watch: Watch object
datastore: Datastore object
request: Flask request
url_for: Flask url_for function
render_template: Flask render_template function
flash: Flask flash function
redirect: Flask redirect function
Returns:
Rendered template or redirect
"""
versions = list(watch.history.keys())
if len(versions) == 0:
flash("Preview unavailable - No snapshots captured yet", "error")
return redirect(url_for('watchlist.index'))
# Get the version to display (default: latest)
preferred_version = request.args.get('version')
timestamp = versions[-1]
if preferred_version and preferred_version in versions:
timestamp = preferred_version
# Render custom template for image preview
# Screenshot is now served via separate /processor-asset/ endpoint instead of base64
# This significantly reduces memory usage by not embedding large images in HTML
return render_template(
'image_ssim_diff/preview.html',
watch=watch,
uuid=watch.get('uuid'),
versions=versions,
timestamp=timestamp,
current_diff_url=watch['url']
)

View File

@@ -1,244 +0,0 @@
"""
Core fast screenshot comparison processor.
Uses OpenCV with subprocess isolation for high-performance, low-memory
image processing. All operations run in isolated subprocesses for complete
memory cleanup and stability.
"""
import hashlib
import os
import time
from loguru import logger
from changedetectionio.processors import difference_detection_processor, SCREENSHOT_FORMAT_PNG
from changedetectionio.processors.exceptions import ProcessorException
from . import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
# All image operations now use OpenCV via isolated_opencv subprocess handler
# Template matching temporarily disabled pending OpenCV implementation
name = 'Visual / Image screenshot change detection'
description = 'Compares screenshots using fast OpenCV algorithm, 10-100x faster than SSIM'
processor_weight = 2
list_badge_text = "Visual"
class perform_site_check(difference_detection_processor):
"""Fast screenshot comparison processor using OpenCV."""
# Override to use PNG format for better image comparison (JPEG compression creates noise)
screenshot_format = SCREENSHOT_FORMAT_PNG
def run_changedetection(self, watch):
"""
Perform screenshot comparison using OpenCV subprocess handler.
Returns:
tuple: (changed_detected, update_obj, screenshot_bytes)
"""
now = time.time()
# Get the current screenshot
if not self.fetcher.screenshot:
raise ProcessorException(
message="No screenshot available. Ensure the watch is configured to use a real browser.",
url=watch.get('url')
)
self.screenshot = self.fetcher.screenshot
self.xpath_data = self.fetcher.xpath_data
# Quick MD5 check - skip expensive comparison if images are identical
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
current_md5 = hashlib.md5(self.screenshot).hexdigest()
previous_md5 = watch.get('previous_md5')
if previous_md5 and current_md5 == previous_md5:
logger.debug(f"UUID: {watch.get('uuid')} - Screenshot MD5 unchanged ({current_md5}), skipping comparison")
raise checksumFromPreviousCheckWasTheSame()
else:
logger.debug(f"UUID: {watch.get('uuid')} - Screenshot MD5 changed")
# Get pixel difference threshold sensitivity (per-watch > global)
# This controls how different a pixel must be (0-255 scale) to count as "changed"
pixel_difference_threshold_sensitivity = watch.get('comparison_threshold')
if not pixel_difference_threshold_sensitivity:
pixel_difference_threshold_sensitivity = self.datastore.data['settings']['application'].get('comparison_threshold', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT)
try:
pixel_difference_threshold_sensitivity = int(pixel_difference_threshold_sensitivity)
except (ValueError, TypeError):
logger.warning(f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default")
pixel_difference_threshold_sensitivity = SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT
# Check if bounding box is set (for drawn area mode)
# Read from processor-specific config JSON file (named after processor)
crop_region = None
# Automatically use the processor name from watch config as filename
processor_name = watch.get('processor', 'default')
config_filename = f'{processor_name}.json'
processor_config = self.get_extra_watch_config(config_filename) if self.get_extra_watch_config(config_filename) else {}
bounding_box = processor_config.get('bounding_box') if processor_config else None
# Template matching for tracking content movement
template_matching_enabled = processor_config.get('auto_track_region', False) #@@todo disabled for now
if bounding_box:
try:
# Parse bounding box: "x,y,width,height"
parts = [int(p.strip()) for p in bounding_box.split(',')]
if len(parts) == 4:
x, y, width, height = parts
# Crop uses (left, top, right, bottom)
crop_region = (max(0, x), max(0, y), x + width, y + height)
logger.info(f"UUID: {watch.get('uuid')} - Bounding box enabled: cropping to region {crop_region} (x={x}, y={y}, w={width}, h={height})")
else:
logger.warning(f"UUID: {watch.get('uuid')} - Invalid bounding box format: {bounding_box} (expected 4 values)")
except Exception as e:
logger.warning(f"UUID: {watch.get('uuid')} - Failed to parse bounding box '{bounding_box}': {e}")
# If no bounding box, check if visual selector (include_filters) is set for region-based comparison
if not crop_region:
include_filters = watch.get('include_filters', [])
if include_filters and len(include_filters) > 0:
# Get the first filter to use for cropping
first_filter = include_filters[0].strip()
if first_filter and self.xpath_data:
try:
import json
# xpath_data is JSON string from browser
xpath_data_obj = json.loads(self.xpath_data) if isinstance(self.xpath_data, str) else self.xpath_data
# Find the bounding box for the first filter
for element in xpath_data_obj.get('size_pos', []):
# Match the filter with the element's xpath
if element.get('xpath') == first_filter and element.get('highlight_as_custom_filter'):
# Found the element - extract crop coordinates
left = element.get('left', 0)
top = element.get('top', 0)
width = element.get('width', 0)
height = element.get('height', 0)
# Crop uses (left, top, right, bottom)
crop_region = (max(0, left), max(0, top), left + width, top + height)
logger.info(f"UUID: {watch.get('uuid')} - Visual selector enabled: cropping to region {crop_region} for filter: {first_filter}")
break
except Exception as e:
logger.warning(f"UUID: {watch.get('uuid')} - Failed to parse xpath_data for visual selector: {e}")
# Store original crop region for template matching
original_crop_region = crop_region
# Check if this is the first check (no previous history)
history_keys = list(watch.history.keys())
if len(history_keys) == 0:
# First check - save baseline, no comparison
logger.info(f"UUID: {watch.get('uuid')} - First check for watch {watch.get('uuid')} - saving baseline screenshot")
# LibVIPS uses automatic reference counting - no explicit cleanup needed
update_obj = {
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
'last_error': False
}
logger.trace(f"Processed in {time.time() - now:.3f}s")
return False, update_obj, self.screenshot
# Get previous screenshot bytes from history
previous_timestamp = history_keys[-1]
previous_screenshot_bytes = watch.get_history_snapshot(timestamp=previous_timestamp)
# Screenshots are stored as PNG, so this should be bytes
if isinstance(previous_screenshot_bytes, str):
# If it's a string (shouldn't be for screenshots, but handle it)
previous_screenshot_bytes = previous_screenshot_bytes.encode('utf-8')
# Template matching is temporarily disabled pending OpenCV implementation
# crop_region calculated above will be used as-is
# Perform comparison in isolated subprocess to prevent memory leaks
try:
from .image_handler import isolated_opencv as process_screenshot_handler
# Get blur sigma
blur_sigma = float(os.getenv("OPENCV_BLUR_SIGMA", "0.8"))
# Get minimum change percentage (per-watch > global > env var default)
# This controls what percentage of pixels must change to trigger a detection
min_change_percentage = watch.get('min_change_percentage')
if min_change_percentage is None or min_change_percentage == '':
min_change_percentage = self.datastore.data['settings']['application'].get('min_change_percentage', float(os.getenv("OPENCV_MIN_CHANGE_PERCENT", "0.1")))
try:
min_change_percentage = float(min_change_percentage)
except (ValueError, TypeError):
logger.warning(f"Invalid min_change_percentage value '{min_change_percentage}', using default 0.1")
min_change_percentage = 0.1
logger.debug(f"UUID: {watch.get('uuid')} - Starting isolated subprocess comparison (crop_region={crop_region})")
# Compare using isolated subprocess with OpenCV (async-safe to avoid blocking event loop)
# Pass raw bytes and crop region - subprocess handles all image operations
import asyncio
import threading
# Async-safe wrapper: runs coroutine in new thread with its own event loop
# This prevents blocking the async update worker's event loop
def run_async_in_thread():
return asyncio.run(
process_screenshot_handler.compare_images_isolated(
previous_screenshot_bytes,
self.screenshot,
pixel_difference_threshold=pixel_difference_threshold_sensitivity,
blur_sigma=blur_sigma,
crop_region=crop_region # Pass crop region for isolated cropping
)
)
# Run in thread to avoid blocking event loop when called from async update worker
result_container = [None]
exception_container = [None]
def thread_target():
try:
result_container[0] = run_async_in_thread()
except Exception as e:
exception_container[0] = e
thread = threading.Thread(target=thread_target)
thread.start()
thread.join(timeout=60)
if exception_container[0]:
raise exception_container[0]
# Subprocess returns only the change score - we decide if it's a "change"
change_score = result_container[0]
if change_score is None:
raise RuntimeError("Image comparison subprocess returned no result")
changed_detected = change_score > min_change_percentage
logger.debug(f"UUID: {watch.get('uuid')} - Isolated subprocess comparison completed: score={change_score:.2f}%, min_change_threshold={min_change_percentage}%")
logger.info(f"{process_screenshot_handler.IMPLEMENTATION_NAME}: {change_score:.2f}% pixels changed, pixel_diff_threshold_sensitivity: {pixel_difference_threshold_sensitivity:.0f}")
except Exception as e:
logger.error(f"UUID: {watch.get('uuid')} - Failed to compare screenshots: {e}")
logger.trace(f"UUID: {watch.get('uuid')} - Processed in {time.time() - now:.3f}s")
raise ProcessorException(
message=f"UUID: {watch.get('uuid')} - Screenshot comparison failed: {e}",
url=watch.get('url')
)
# Return results
update_obj = {
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
'last_error': False
}
if changed_detected:
logger.info(f"UUID: {watch.get('uuid')} - Change detected using OpenCV! Score: {change_score:.2f}")
else:
logger.debug(f"UUID: {watch.get('uuid')} - No significant change using OpenCV. Score: {change_score:.2f}")
logger.trace(f"UUID: {watch.get('uuid')} - Processed in {time.time() - now:.3f}s")
return changed_detected, update_obj, self.screenshot

View File

@@ -1,235 +0,0 @@
{% extends 'base.html' %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
{% block content %}
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='diff-image.css')}}?v={{ get_css_version() }}">
<script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
<div id="settings">
<form class="pure-form " action="{{ url_for('ui.ui_diff.diff_history_page', uuid=uuid) }}" method="GET" id="diff-form">
<fieldset class="diff-fieldset">
{% if versions|length >= 1 %}
<span style="white-space: nowrap;">
<label id="change-from" for="diff-from-version" class="from-to-label">From</label>
<select id="diff-from-version" name="from_version" class="needs-localtime">
{%- for version in versions|reverse -%}
<option value="{{ version }}" {% if version== from_version %} selected="" {% endif %}>
{{ version }}
</option>
{%- endfor -%}
</select>
</span>
<span style="white-space: nowrap;">
<label id="change-to" for="diff-to-version" class="from-to-label">To</label>
<select id="diff-to-version" name="to_version" class="needs-localtime">
{%- for version in versions|reverse -%}
<option value="{{ version }}" {% if version== to_version %} selected="" {% endif %}>
{{ version }}
</option>
{%- endfor -%}
</select>
</span>
{% endif %}
</fieldset>
<fieldset id="diff-style">
<span>
<strong>Change Detection:</strong> {{ "%.2f"|format(change_percentage) }}% of pixels changed
{% if change_percentage > 0.1 %}
<span class="change-detected">⚠ Change Detected</span>
{% else %}
<span class="no-change">✓ No Significant Change</span>
{% endif %}
</span>
</fieldset>
{%- if versions|length >= 2 -%}
<div id="keyboard-nav">
<strong>Keyboard: </strong>
<a href="" class="pure-button pure-button-primary" id="btn-previous"> &larr; Previous</a>
&nbsp; <a class="pure-button pure-button-primary" id="btn-next" href=""> &rarr; Next</a>
</div>
{%- endif -%}
</form>
</div>
<div id="screenshot-comparison">
<!-- Two-panel layout: Interactive slider + Static diff -->
<div class="comparison-grid">
<!-- Panel 1: Interactive Comparison Slider (Previous ↔ Current) -->
<div class="screenshot-panel">
<h3>Interactive Comparison</h3>
<div class="comparison-description">
Drag slider to compare Previous ({{ from_version|format_timestamp_timeago }})
vs Current ({{ to_version|format_timestamp_timeago }})
</div>
<div style="text-align: center; margin-bottom: 0.5em; display: flex; justify-content: center; gap: 1em;">
<a href="#" onclick="downloadImage('img-before', '{{ from_version }}'); return false;" class="download-link" title="Download previous snapshot">
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
<path d="M8 12L3 7h3V1h4v6h3z"/>
<path d="M1 14h14v2H1z"/>
</svg>
Previous
</a>
<a href="#" onclick="downloadImage('img-after', '{{ to_version }}'); return false;" class="download-link" title="Download current snapshot">
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
<path d="M8 12L3 7h3V1h4v6h3z"/>
<path d="M1 14h14v2H1z"/>
</svg>
Current
</a>
</div>
<div class="image-comparison" id="comparison-container">
<!-- Before image wrapper (Previous snapshot) -->
<div class="comparison-image-wrapper">
<img id="img-before" src="{{ url_for('ui.ui_diff.processor_asset', uuid=uuid, asset_name='before', from_version=from_version, to_version=to_version) }}" alt="Previous screenshot">
</div>
<!-- After image wrapper (Current snapshot) -->
<div class="comparison-image-wrapper comparison-after">
<img id="img-after" src="{{ url_for('ui.ui_diff.processor_asset', uuid=uuid, asset_name='after', from_version=from_version, to_version=to_version) }}" alt="Current screenshot">
</div>
<!-- Labels -->
<div class="comparison-labels">
<span class="comparison-label">Previous</span>
<span class="comparison-label">Current</span>
</div>
<!-- Draggable slider -->
<div class="comparison-slider" id="comparison-slider">
<div class="comparison-handle"></div>
</div>
</div>
</div>
<!-- Panel 2: Difference Visualization (Static) -->
<div class="screenshot-panel diff">
<h3>Difference Visualization</h3>
<div class="diff-section-header">
<span>Red = Changed Pixels</span>
</div>
<div style="text-align: center; margin-bottom: 0.5em;">
<a href="#" onclick="downloadImage('diff-image', '{{ to_version }}_diff'); return false;" class="download-link" title="Download difference image">
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" style="display: inline-block;">
<path d="M8 12L3 7h3V1h4v6h3z"/>
<path d="M1 14h14v2H1z"/>
</svg>
Download
</a>
</div>
<img id="diff-image" src="{{ url_for('ui.ui_diff.processor_asset', uuid=uuid, asset_name='rendered_diff', from_version=from_version, to_version=to_version) }}" alt="Difference visualization with red highlights">
</div>
</div>
{% if comparison_data and comparison_data.get('history') and comparison_data.history|length > 1 %}
<div class="comparison-history-section">
<h3>Comparison History</h3>
<p>Recent comparison results (last {{ comparison_data.history|length }} checks)</p>
<div style="overflow-x: auto;">
<table class="pure-table pure-table-striped" style="width: 100%;">
<thead>
<tr>
<th>Timestamp</th>
<th>Change %</th>
<th>Method</th>
<th>Changed?</th>
</tr>
</thead>
<tbody>
{% for entry in comparison_data.history|reverse %}
<tr>
<td>{{ entry.timestamp|format_timestamp_timeago }}</td>
<td>{{ "%.2f"|format(entry.change_percentage) }}%</td>
<td>{{ entry.method }}</td>
<td>
{% if entry.changed %}
<span class="history-changed-yes">Yes</span>
{% else %}
<span class="history-changed-no">No</span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endif %}
</div>
<script>
function downloadImage(imageId, filename) {
// Get the image element
const img = document.getElementById(imageId);
const base64Data = img.src;
// Convert base64 to blob
const byteString = atob(base64Data.split(',')[1]);
const mimeString = base64Data.split(',')[0].split(':')[1].split(';')[0];
const ab = new ArrayBuffer(byteString.length);
const ia = new Uint8Array(ab);
for (let i = 0; i < byteString.length; i++) {
ia[i] = byteString.charCodeAt(i);
}
const blob = new Blob([ab], { type: mimeString });
// Determine file extension from MIME type
const extension = mimeString.includes('jpeg') ? '.jpeg' : '.png';
// Create download link
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename + extension;
document.body.appendChild(a);
a.click();
// Cleanup
setTimeout(() => {
document.body.removeChild(a);
URL.revokeObjectURL(url);
}, 100);
}
/**
* Synchronize comparison slider width with diff image width
* This ensures both panels display images at the same max-width
*/
function syncComparisonWidth() {
const diffImage = document.getElementById('diff-image');
const comparisonContainer = document.getElementById('comparison-container');
if (!diffImage || !comparisonContainer) return;
// Wait for diff image to load to get its actual rendered width
if (diffImage.complete) {
applyWidth();
} else {
diffImage.addEventListener('load', applyWidth);
}
function applyWidth() {
const diffImageWidth = diffImage.offsetWidth;
if (diffImageWidth > 0) {
comparisonContainer.style.maxWidth = diffImageWidth + 'px';
comparisonContainer.style.margin = '0 auto';
}
}
}
// Run on page load
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', syncComparisonWidth);
} else {
syncComparisonWidth();
}
// Re-sync on window resize
window.addEventListener('resize', syncComparisonWidth);
</script>
<script src="{{ url_for('static_content', group='js', filename='comparison-slider.js') }}" defer></script>
{% endblock %}

View File

@@ -1,35 +0,0 @@
{% extends 'base.html' %}
{% block content %}
<script src="{{ url_for('static_content', group='js', filename='preview.js') }}" defer></script>
{% if versions|length >= 2 %}
<div id="diff-form" style="text-align: center;">
<form class="pure-form " action="" method="GET">
<fieldset>
<label for="preview-version">Select timestamp</label> <select id="preview-version"
name="version"
class="needs-localtime">
{% for version in versions|reverse %}
<option value="{{ version }}" {% if version == timestamp %} selected="" {% endif %}>
{{ version }}
</option>
{% endfor %}
</select>
<button type="submit" class="pure-button pure-button-primary">Go</button>
</fieldset>
</form>
<br>
<strong>Keyboard: </strong><a href="" class="pure-button pure-button-primary" id="btn-previous">
&larr; Previous</a> &nbsp; <a class="pure-button pure-button-primary" id="btn-next" href="">
&rarr; Next</a>
</div>
{% endif %}
<div id="screenshot-container" style="text-align: center; border: 1px solid #ddd; padding: 2em; background: #fafafa; border-radius: 4px;">
<h3 style="margin-top: 0;">Screenshot from {{ timestamp|format_timestamp_timeago }}</h3>
<img src="{{ url_for('ui.ui_preview.processor_asset', uuid=uuid, asset_name='screenshot', version=timestamp) }}"
alt="Screenshot preview"
style="max-width: 100%; height: auto; border: 1px solid #ccc; box-shadow: 0 2px 8px rgba(0,0,0,0.1); border-radius: 2px;">
</div>
{% endblock %}

View File

@@ -1,22 +0,0 @@
"""
DEPRECATED: All multiprocessing functions have been removed.
The image_ssim_diff processor now uses LibVIPS via ImageDiffHandler abstraction,
which provides superior performance and memory efficiency through streaming
architecture and automatic threading.
All image operations are now handled by:
- imagehandler.py: Abstract base class defining the interface
- libvips_handler.py: LibVIPS implementation with streaming and threading
Historical note: This file previously contained multiprocessing workers for:
- Template matching (find_region_with_template_matching_isolated)
- Template regeneration (regenerate_template_isolated)
- Image cropping (crop_image_isolated, crop_pil_image_isolated)
These have been replaced by handler methods which are:
- Faster (no subprocess overhead)
- More memory efficient (LibVIPS streaming)
- Cleaner (no multiprocessing deadlocks)
- Better tested (no logger/forking issues)
"""

View File

@@ -9,8 +9,6 @@ import time
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Re-stock & Price detection for pages with a SINGLE product'
description = 'Detects if the product goes back to in-stock'
processor_weight = 1
list_badge_text = "Restock"
class UnableToExtractRestockData(Exception):
def __init__(self, status_code):

View File

@@ -19,8 +19,6 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
name = 'Webpage Text/HTML, JSON and PDF changes'
description = 'Detects all text changes where possible'
processor_weight = -100
list_badge_text = "Text"
JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:']

View File

@@ -89,20 +89,20 @@ class RecheckPriorityQueue:
try:
# Wait for notification
self.sync_q.get(block=block, timeout=timeout)
# Get highest priority item
with self._lock:
if not self._priority_items:
logger.critical(f"CRITICAL: Queue notification received but no priority items available")
raise Exception("Priority queue inconsistency")
item = heapq.heappop(self._priority_items)
# Emit signals
self._emit_get_signals()
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
return item
except Exception as e:
logger.critical(f"CRITICAL: Failed to get item from queue: {str(e)}")
raise
@@ -141,20 +141,20 @@ class RecheckPriorityQueue:
try:
# Wait for notification
await self.async_q.get()
# Get highest priority item
with self._lock:
if not self._priority_items:
logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
raise Exception("Priority queue inconsistency")
item = heapq.heappop(self._priority_items)
# Emit signals
self._emit_get_signals()
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
return item
except Exception as e:
logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}")
raise

View File

@@ -24,19 +24,6 @@ $(document).ready(function () {
$(target).toggle();
});
// Handle processor radio button changes - update body class
$('input[name="processor"]').on('change', function() {
var selectedProcessor = $(this).val();
// Remove any existing processor-* classes from body
$('body').removeClass(function(index, className) {
return (className.match(/\bprocessor-\S+/g) || []).join(' ');
});
// Add the new processor class
$('body').addClass('processor-' + selectedProcessor);
});
// Time zone config related
$(".local-time").each(function (e) {
$(this).text(new Date($(this).data("utc")).toLocaleString());

View File

@@ -11,18 +11,6 @@ $(document).ready(() => {
let c, xctx, ctx;
let xScale = 1, yScale = 1;
let selectorImage, selectorImageRect, selectorData;
let elementHandlers = {}; // Store references to element selection handlers (needed for draw mode toggling)
// Box drawing mode variables (for image_ssim_diff processor)
let drawMode = false;
let isDrawing = false;
let isDragging = false;
let drawStartX, drawStartY;
let dragOffsetX, dragOffsetY;
let drawnBox = null;
let resizeHandle = null;
const HANDLE_SIZE = 8;
const isImageProcessor = $('input[value="image_ssim_diff"]').is(':checked');
// Global jQuery selectors with "Elem" appended
@@ -153,10 +141,6 @@ $(document).ready(() => {
setScale();
reflowSelector();
// Initialize draw mode after everything is set up
initializeDrawMode();
$fetchingUpdateNoticeElem.fadeOut();
});
}
@@ -217,14 +201,9 @@ $(document).ready(() => {
highlightCurrentSelected();
updateFiltersText();
// Store handler references for later use
elementHandlers.handleMouseMove = handleMouseMove.debounce(5);
elementHandlers.handleMouseDown = handleMouseDown.debounce(5);
elementHandlers.handleMouseLeave = highlightCurrentSelected.debounce(5);
$selectorCanvasElem.bind('mousemove', elementHandlers.handleMouseMove);
$selectorCanvasElem.bind('mousedown', elementHandlers.handleMouseDown);
$selectorCanvasElem.bind('mouseleave', elementHandlers.handleMouseLeave);
$selectorCanvasElem.bind('mousemove', handleMouseMove.debounce(5));
$selectorCanvasElem.bind('mousedown', handleMouseDown.debounce(5));
$selectorCanvasElem.bind('mouseleave', highlightCurrentSelected.debounce(5));
function handleMouseMove(e) {
if (!e.offsetX && !e.offsetY) {
@@ -278,372 +257,4 @@ $(document).ready(() => {
xctx.strokeRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale);
});
}
// ============= BOX DRAWING MODE (for image_ssim_diff processor) =============
function initializeDrawMode() {
if (!isImageProcessor || !c) return;
const $selectorModeRadios = $('input[name="selector-mode"]');
const $boundingBoxField = $('#bounding_box');
const $selectionModeField = $('#selection_mode');
// Load existing selection mode if present
const savedMode = $selectionModeField.val();
if (savedMode && (savedMode === 'element' || savedMode === 'draw')) {
$selectorModeRadios.filter(`[value="${savedMode}"]`).prop('checked', true);
console.log('Loaded saved mode:', savedMode);
}
// Load existing bounding box if present
const existingBox = $boundingBoxField.val();
if (existingBox) {
try {
const parts = existingBox.split(',').map(p => parseFloat(p));
if (parts.length === 4) {
drawnBox = {
x: parts[0] * xScale,
y: parts[1] * yScale,
width: parts[2] * xScale,
height: parts[3] * yScale
};
console.log('Loaded saved bounding box:', existingBox);
}
} catch (e) {
console.error('Failed to parse existing bounding box:', e);
}
}
// Update mode when radio changes
$selectorModeRadios.off('change').on('change', function() {
const newMode = $(this).val();
drawMode = newMode === 'draw';
console.log('Mode changed to:', newMode);
// Save the mode to the hidden field
$selectionModeField.val(newMode);
if (drawMode) {
enableDrawMode();
} else {
disableDrawMode();
}
});
// Set initial mode based on which radio is checked
drawMode = $selectorModeRadios.filter(':checked').val() === 'draw';
console.log('Initial mode:', drawMode ? 'draw' : 'element');
// Save initial mode
$selectionModeField.val(drawMode ? 'draw' : 'element');
if (drawMode) {
enableDrawMode();
}
}
function enableDrawMode() {
console.log('Enabling draw mode...');
// Unbind element selection handlers
$selectorCanvasElem.unbind('mousemove mousedown mouseleave');
// Set cursor to crosshair
$selectorCanvasElem.css('cursor', 'crosshair');
// Bind draw mode handlers
$selectorCanvasElem.on('mousedown', handleDrawMouseDown);
$selectorCanvasElem.on('mousemove', handleDrawMouseMove);
$selectorCanvasElem.on('mouseup', handleDrawMouseUp);
$selectorCanvasElem.on('mouseleave', handleDrawMouseUp);
// Clear element selections and xpath display
currentSelections = [];
$includeFiltersElem.val('');
$selectorCurrentXpathElem.html('Draw mode - click and drag to select an area');
// Clear the canvas
if (ctx && xctx) {
ctx.clearRect(0, 0, c.width, c.height);
xctx.clearRect(0, 0, c.width, c.height);
}
// Redraw if we have an existing box
if (drawnBox) {
drawBox();
}
}
function disableDrawMode() {
console.log('Disabling draw mode, switching to element mode...');
// Unbind draw handlers
$selectorCanvasElem.unbind('mousedown mousemove mouseup mouseleave');
// Reset cursor
$selectorCanvasElem.css('cursor', 'default');
// Clear drawn box
drawnBox = null;
$('#bounding_box').val('');
// Clear the canvases
if (ctx && xctx) {
ctx.clearRect(0, 0, c.width, c.height);
xctx.clearRect(0, 0, c.width, c.height);
}
// Restore element selections from include_filters
currentSelections = [];
if (selectorData && selectorData['size_pos']) {
let existingFilters = splitToList($includeFiltersElem.val());
selectorData['size_pos'].forEach(sel => {
if ((!runInClearMode && sel.highlight_as_custom_filter) || existingFilters.includes(sel.xpath)) {
console.log("Restoring selection: " + sel.xpath);
currentSelections.push(sel);
}
});
}
// Re-enable element selection handlers using stored references
if (elementHandlers.handleMouseMove) {
$selectorCanvasElem.bind('mousemove', elementHandlers.handleMouseMove);
$selectorCanvasElem.bind('mousedown', elementHandlers.handleMouseDown);
$selectorCanvasElem.bind('mouseleave', elementHandlers.handleMouseLeave);
}
// Restore the element selection display
$selectorCurrentXpathElem.html('Hover over elements to select');
// Highlight the restored selections
highlightCurrentSelected();
}
function handleDrawMouseDown(e) {
const rect = c.getBoundingClientRect();
const x = e.clientX - rect.left;
const y = e.clientY - rect.top;
// Check if clicking on a resize handle
if (drawnBox) {
resizeHandle = getResizeHandle(x, y);
if (resizeHandle) {
isDrawing = true;
drawStartX = x;
drawStartY = y;
return;
}
// Check if clicking inside the box (for dragging)
if (isInsideBox(x, y)) {
isDragging = true;
dragOffsetX = x - drawnBox.x;
dragOffsetY = y - drawnBox.y;
$selectorCanvasElem.css('cursor', 'move');
return;
}
}
// Start new box
isDrawing = true;
drawStartX = x;
drawStartY = y;
drawnBox = { x: x, y: y, width: 0, height: 0 };
}
function handleDrawMouseMove(e) {
const rect = c.getBoundingClientRect();
const x = e.clientX - rect.left;
const y = e.clientY - rect.top;
// Update cursor based on position
if (!isDrawing && !isDragging && drawnBox) {
const handle = getResizeHandle(x, y);
if (handle) {
$selectorCanvasElem.css('cursor', getHandleCursor(handle));
} else if (isInsideBox(x, y)) {
$selectorCanvasElem.css('cursor', 'move');
} else {
$selectorCanvasElem.css('cursor', 'crosshair');
}
}
// Handle dragging the box
if (isDragging) {
drawnBox.x = x - dragOffsetX;
drawnBox.y = y - dragOffsetY;
drawBox();
return;
}
if (!isDrawing) return;
if (resizeHandle) {
// Resize existing box
resizeBox(x, y);
} else {
// Draw new box
drawnBox.width = x - drawStartX;
drawnBox.height = y - drawStartY;
}
drawBox();
}
function handleDrawMouseUp(e) {
if (!isDrawing && !isDragging) return;
isDrawing = false;
isDragging = false;
resizeHandle = null;
if (drawnBox) {
// Normalize box (handle negative dimensions)
if (drawnBox.width < 0) {
drawnBox.x += drawnBox.width;
drawnBox.width = Math.abs(drawnBox.width);
}
if (drawnBox.height < 0) {
drawnBox.y += drawnBox.height;
drawnBox.height = Math.abs(drawnBox.height);
}
// Constrain to canvas bounds
drawnBox.x = Math.max(0, Math.min(drawnBox.x, c.width - drawnBox.width));
drawnBox.y = Math.max(0, Math.min(drawnBox.y, c.height - drawnBox.height));
// Save to form field (convert from scaled to natural coordinates)
const naturalX = Math.round(drawnBox.x / xScale);
const naturalY = Math.round(drawnBox.y / yScale);
const naturalWidth = Math.round(drawnBox.width / xScale);
const naturalHeight = Math.round(drawnBox.height / yScale);
$('#bounding_box').val(`${naturalX},${naturalY},${naturalWidth},${naturalHeight}`);
drawBox();
}
}
function drawBox() {
if (!drawnBox) return;
// Clear and redraw
ctx.clearRect(0, 0, c.width, c.height);
xctx.clearRect(0, 0, c.width, c.height);
// Draw box
ctx.strokeStyle = STROKE_STYLE_REDLINE;
ctx.fillStyle = FILL_STYLE_REDLINE;
ctx.lineWidth = 3;
const drawX = drawnBox.width >= 0 ? drawnBox.x : drawnBox.x + drawnBox.width;
const drawY = drawnBox.height >= 0 ? drawnBox.y : drawnBox.y + drawnBox.height;
const drawW = Math.abs(drawnBox.width);
const drawH = Math.abs(drawnBox.height);
ctx.strokeRect(drawX, drawY, drawW, drawH);
ctx.fillRect(drawX, drawY, drawW, drawH);
// Draw resize handles
if (!isDrawing) {
drawResizeHandles(drawX, drawY, drawW, drawH);
}
}
function drawResizeHandles(x, y, w, h) {
ctx.fillStyle = '#fff';
ctx.strokeStyle = '#000';
ctx.lineWidth = 1;
const handles = [
{ x: x, y: y }, // top-left
{ x: x + w, y: y }, // top-right
{ x: x, y: y + h }, // bottom-left
{ x: x + w, y: y + h } // bottom-right
];
handles.forEach(handle => {
ctx.fillRect(handle.x - HANDLE_SIZE/2, handle.y - HANDLE_SIZE/2, HANDLE_SIZE, HANDLE_SIZE);
ctx.strokeRect(handle.x - HANDLE_SIZE/2, handle.y - HANDLE_SIZE/2, HANDLE_SIZE, HANDLE_SIZE);
});
}
function isInsideBox(x, y) {
if (!drawnBox) return false;
const drawX = drawnBox.width >= 0 ? drawnBox.x : drawnBox.x + drawnBox.width;
const drawY = drawnBox.height >= 0 ? drawnBox.y : drawnBox.y + drawnBox.height;
const drawW = Math.abs(drawnBox.width);
const drawH = Math.abs(drawnBox.height);
return x >= drawX && x <= drawX + drawW && y >= drawY && y <= drawY + drawH;
}
function getResizeHandle(x, y) {
if (!drawnBox) return null;
const drawX = drawnBox.width >= 0 ? drawnBox.x : drawnBox.x + drawnBox.width;
const drawY = drawnBox.height >= 0 ? drawnBox.y : drawnBox.y + drawnBox.height;
const drawW = Math.abs(drawnBox.width);
const drawH = Math.abs(drawnBox.height);
const handles = {
'tl': { x: drawX, y: drawY },
'tr': { x: drawX + drawW, y: drawY },
'bl': { x: drawX, y: drawY + drawH },
'br': { x: drawX + drawW, y: drawY + drawH }
};
for (const [key, handle] of Object.entries(handles)) {
if (Math.abs(x - handle.x) <= HANDLE_SIZE && Math.abs(y - handle.y) <= HANDLE_SIZE) {
return key;
}
}
return null;
}
function getHandleCursor(handle) {
const cursors = {
'tl': 'nw-resize',
'tr': 'ne-resize',
'bl': 'sw-resize',
'br': 'se-resize'
};
return cursors[handle] || 'crosshair';
}
function resizeBox(x, y) {
const dx = x - drawStartX;
const dy = y - drawStartY;
const originalBox = { ...drawnBox };
switch (resizeHandle) {
case 'tl':
drawnBox.x = x;
drawnBox.y = y;
drawnBox.width = originalBox.x + originalBox.width - x;
drawnBox.height = originalBox.y + originalBox.height - y;
break;
case 'tr':
drawnBox.y = y;
drawnBox.width = x - originalBox.x;
drawnBox.height = originalBox.y + originalBox.height - y;
break;
case 'bl':
drawnBox.x = x;
drawnBox.width = originalBox.x + originalBox.width - x;
drawnBox.height = y - originalBox.y;
break;
case 'br':
drawnBox.width = x - originalBox.x;
drawnBox.height = y - originalBox.y;
break;
}
drawStartX = x;
drawStartY = y;
}
});

View File

@@ -1,259 +0,0 @@
/**
* Image Comparison Diff Styles
* Styles for the interactive image comparison slider and screenshot diff visualization
*/
.comparison-score {
padding: 1em;
background: var(--color-table-stripe);
border-radius: 4px;
margin: 1em 0;
border: 1px solid var(--color-border-table-cell);
color: var(--color-text);
}
.change-detected {
color: #d32f2f;
font-weight: bold;
}
.no-change {
color: #388e3c;
font-weight: bold;
}
.comparison-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1em;
margin: 1em 1em;
@media (max-width: 1200px) {
grid-template-columns: 1fr;
}
}
/* Interactive Image Comparison Slider */
.image-comparison {
position: relative;
width: 100%;
overflow: hidden;
border: 1px solid var(--color-border-table-cell);
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
user-select: none;
img {
display: block;
width: 100%;
height: auto;
max-width: 100%;
border: none;
box-shadow: none;
}
}
/* Image wrappers with checkered background */
.comparison-image-wrapper {
position: relative;
width: 100%;
display: flex;
align-items: flex-start;
justify-content: center;
/* Very light checkered background pattern */
background-color: var(--color-background);
background-image:
linear-gradient(45deg, var(--color-table-stripe) 25%, transparent 25%),
linear-gradient(-45deg, var(--color-table-stripe) 25%, transparent 25%),
linear-gradient(45deg, transparent 75%, var(--color-table-stripe) 75%),
linear-gradient(-45deg, transparent 75%, var(--color-table-stripe) 75%);
background-size: 20px 20px;
background-position: 0 0, 0 10px, 10px -10px, -10px 0px;
}
.comparison-after {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
clip-path: inset(0 0 0 50%);
}
.comparison-slider {
position: absolute;
top: 0;
left: 50%;
width: 4px;
height: 100%;
background: #0078e7;
cursor: ew-resize;
transform: translateX(-2px);
z-index: 10;
}
.comparison-handle {
position: absolute;
top: 50%;
left: 50%;
width: 48px;
height: 48px;
background: #0078e7;
border: 3px solid white;
border-radius: 50%;
transform: translate(-50%, -50%);
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
display: flex;
align-items: center;
justify-content: center;
cursor: ew-resize;
transition: top 0.1s ease-out;
&::after {
content: '';
color: white;
font-size: 24px;
font-weight: bold;
pointer-events: none;
}
}
.comparison-labels {
position: absolute;
top: 10px;
width: 100%;
display: flex;
justify-content: space-between;
padding: 0 0px;
z-index: 5;
pointer-events: none;
}
.comparison-label {
background: rgba(0, 0, 0, 0.7);
color: white;
padding: 0.5em 1em;
border-radius: 4px;
font-size: 0.9em;
font-weight: bold;
}
.screenshot-panel {
text-align: center;
background: var(--color-background);
border: 1px solid var(--color-border-table-cell);
border-radius: 4px;
padding: 1em;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
h3 {
margin: 0 0 1em 0;
font-size: 1.1em;
color: var(--color-text);
border-bottom: 2px solid var(--color-background-button-primary);
padding-bottom: 0.5em;
}
&.diff h3 {
border-bottom-color: #d32f2f;
}
img {
max-width: 100%;
height: auto;
border: 1px solid var(--color-border-table-cell);
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
}
.version-selector {
display: inline-block;
margin: 0 0.5em;
label {
font-weight: bold;
margin-right: 0.5em;
color: var(--color-text);
}
}
#settings {
background: var(--color-background);
padding: 1.5em;
border-radius: 4px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
margin-bottom: 2em;
border: 1px solid var(--color-border-table-cell);
h2 {
margin-top: 0;
color: var(--color-text);
}
}
.diff-fieldset {
border: none;
padding: 0;
margin: 0;
}
.edit-link {
float: right;
margin-top: -0.5em;
}
.comparison-description {
color: var(--color-text-input-description);
font-size: 0.9em;
margin-bottom: 1em;
}
.download-link {
color: var(--color-link);
text-decoration: none;
display: inline-flex;
align-items: center;
gap: 0.3em;
font-size: 0.85em;
&:hover {
text-decoration: underline;
}
}
.diff-section-header {
color: #d32f2f;
font-size: 0.9em;
margin-bottom: 1em;
font-weight: bold;
display: flex;
align-items: center;
justify-content: center;
gap: 1em;
}
.comparison-history-section {
margin-top: 3em;
padding: 1em;
background: var(--color-background);
border: 1px solid var(--color-border-table-cell);
border-radius: 4px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
h3 {
color: var(--color-text);
}
p {
color: var(--color-text-input-description);
font-size: 0.9em;
}
}
.history-changed-yes {
color: #d32f2f;
font-weight: bold;
}
.history-changed-no {
color: #388e3c;
}

View File

@@ -1,10 +0,0 @@
body.processor-image_ssim_diff {
#edit-text-filter {
.text-filtering {
display: none;
}
}
#conditions-tab {
display: none;
}
}

View File

@@ -21,8 +21,6 @@
@use "parts/socket";
@use "parts/visualselector";
@use "parts/widgets";
@use "parts/diff_image";
body {
color: var(--color-text);
@@ -184,13 +182,6 @@ code {
margin-right: 4px;
}
/* Processor type badges - colors auto-generated from processor names */
.processor-badge {
@extend .inline-tag;
font-size: 0.85em;
font-weight: 500;
}
.watch-tag-list {
color: var(--color-white);
background: var(--color-text-watch-tag-list);

File diff suppressed because one or more lines are too long

View File

@@ -187,26 +187,26 @@ def is_watch_running(watch_uuid):
return watch_uuid in get_running_uuids()
def queue_item_async_safe(update_q, item, silent=False):
def queue_item_async_safe(update_q, item):
"""Bulletproof queue operation with comprehensive error handling"""
item_uuid = 'unknown'
try:
# Safely extract UUID for logging
if hasattr(item, 'item') and isinstance(item.item, dict):
item_uuid = item.item.get('uuid', 'unknown')
except Exception as uuid_e:
logger.critical(f"CRITICAL: Failed to extract UUID from queue item: {uuid_e}")
# Validate inputs
if not update_q:
logger.critical(f"CRITICAL: Queue is None/invalid for item {item_uuid}")
return False
if not item:
logger.critical(f"CRITICAL: Item is None/invalid")
return False
# Attempt queue operation with multiple fallbacks
try:
# Primary: Use sync interface (thread-safe)
@@ -214,9 +214,8 @@ def queue_item_async_safe(update_q, item, silent=False):
if success is False: # Explicit False return means failure
logger.critical(f"CRITICAL: Queue.put() returned False for item {item_uuid}")
return False
if not silent:
logger.debug(f"Successfully queued item: {item_uuid}")
logger.debug(f"Successfully queued item: {item_uuid}")
return True
except Exception as e:

View File

@@ -72,13 +72,7 @@ lxml >=4.8.0,!=5.2.0,!=5.2.1,<7
# Consider updating to latest stable version periodically
elementpath==5.0.4
# For fast image comparison in screenshot change detection
# opencv-python-headless is OPTIONAL (excluded from requirements.txt)
# - Installed conditionally via Dockerfile (skipped on arm/v7 and arm/v8 due to long build times)
# - Pixelmatch is used as fallback when OpenCV is unavailable
# - To install manually: pip install opencv-python-headless>=4.8.0.76
selenium~=4.31.0
selenium~=4.39.0
# Templating, so far just in the URLs but in the future can be for the notifications also
jinja2~=3.1
@@ -92,6 +86,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux"
# playwright is installed at Dockerfile build time because it's not available on all platforms
pyppeteer-ng==2.0.0rc10
pyppeteerstealth>=0.0.4
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup