diff --git a/changedetectionio/api/Watch.py b/changedetectionio/api/Watch.py index 77721a1a..7ee56313 100644 --- a/changedetectionio/api/Watch.py +++ b/changedetectionio/api/Watch.py @@ -127,7 +127,60 @@ class Watch(Resource): if request.json.get('url') and not is_safe_valid_url(request.json.get('url')): return "Invalid URL", 400 - watch.update(request.json) + # Handle processor-config-* fields separately (save to JSON, not datastore) + from changedetectionio import processors + processor_config_data = {} + regular_data = {} + + for key, value in request.json.items(): + if key.startswith('processor_config_'): + config_key = key.replace('processor_config_', '') + if value: # Only save non-empty values + processor_config_data[config_key] = value + else: + regular_data[key] = value + + # Update watch with regular (non-processor-config) fields + watch.update(regular_data) + + # Save processor config to JSON file if any config data exists + if processor_config_data: + try: + processor_name = request.json.get('processor', watch.get('processor')) + if processor_name: + # Create a processor instance to access config methods + from changedetectionio.processors import difference_detection_processor + processor_instance = difference_detection_processor(self.datastore, uuid) + # Use processor name as filename so each processor keeps its own config + config_filename = f'{processor_name}.json' + processor_instance.update_extra_watch_config(config_filename, processor_config_data) + logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}") + + # Call optional edit_hook if processor has one + try: + import importlib + edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook' + + try: + edit_hook = importlib.import_module(edit_hook_module_name) + logger.debug(f"API: Found edit_hook module for {processor_name}") + + if hasattr(edit_hook, 'on_config_save'): + logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}") + # Call hook and get updated config + updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore) + # Save updated config back to file + processor_instance.update_extra_watch_config(config_filename, updated_config) + logger.info(f"API: Edit hook updated config: {updated_config}") + else: + logger.debug(f"API: Edit hook module found but no on_config_save function") + except ModuleNotFoundError: + logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)") + except Exception as hook_error: + logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True) + + except Exception as e: + logger.error(f"API: Failed to save processor config: {e}") return "OK", 200 diff --git a/changedetectionio/blueprint/ui/edit.py b/changedetectionio/blueprint/ui/edit.py index c5fdf41c..461e72be 100644 --- a/changedetectionio/blueprint/ui/edit.py +++ b/changedetectionio/blueprint/ui/edit.py @@ -168,6 +168,32 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe config_filename = f'{processor_name}.json' processor_instance.update_extra_watch_config(config_filename, processor_config_data) logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}") + + # Call optional edit_hook if processor has one + try: + # Try to import the edit_hook module from the processor package + import importlib + edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook' + + try: + edit_hook = importlib.import_module(edit_hook_module_name) + logger.debug(f"Found edit_hook module for {processor_name}") + + if hasattr(edit_hook, 'on_config_save'): + logger.info(f"Calling edit_hook.on_config_save for {processor_name}") + watch_obj = datastore.data['watching'][uuid] + # Call hook and get updated config + updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore) + # Save updated config back to file + processor_instance.update_extra_watch_config(config_filename, updated_config) + logger.info(f"Edit hook updated config: {updated_config}") + else: + logger.debug(f"Edit hook module found but no on_config_save function") + except ModuleNotFoundError: + logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)") + except Exception as hook_error: + logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True) + except Exception as e: logger.error(f"Failed to save processor config: {e}") diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py index a2d44a65..83c3f31e 100644 --- a/changedetectionio/content_fetchers/playwright.py +++ b/changedetectionio/content_fetchers/playwright.py @@ -28,6 +28,16 @@ async def capture_full_page_async(page, screenshot_format='JPEG'): y = 0 if page_height > page.viewport_size['height']: + + # Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing + # capture_full_page_async() changes viewport height which triggers @media (min-height) rules + lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js') + with open(lock_elements_js_path, 'r') as f: + lock_elements_js = f.read() + await page.evaluate(lock_elements_js) + + logger.debug("Element dimensions locked before screenshot capture") + if page_height < step_size: step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size") diff --git a/changedetectionio/content_fetchers/puppeteer.py b/changedetectionio/content_fetchers/puppeteer.py index c3548539..6a1ea660 100644 --- a/changedetectionio/content_fetchers/puppeteer.py +++ b/changedetectionio/content_fetchers/puppeteer.py @@ -50,6 +50,14 @@ async def capture_full_page(page, screenshot_format='JPEG'): screenshot_chunks = [] y = 0 if page_height > page.viewport['height']: + # Lock all element dimensions BEFORE screenshot to prevent CSS media queries from resizing + # capture_full_page() changes viewport height which triggers @media (min-height) rules + lock_elements_js_path = os.path.join(os.path.dirname(__file__), 'res', 'lock-elements-sizing.js') + with open(lock_elements_js_path, 'r') as f: + lock_elements_js = f.read() + await page.evaluate(lock_elements_js) + logger.debug("Element dimensions locked before screenshot capture") + if page_height < step_size: step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size await page.setViewport({'width': page.viewport['width'], 'height': step_size}) @@ -222,7 +230,6 @@ class fetcher(Fetcher): "height": int(match.group(2)) }) logger.debug(f"Puppeteer viewport size {self.page.viewport}") - try: from pyppeteerstealth import inject_evasions_into_page except ImportError: @@ -354,6 +361,11 @@ class fetcher(Fetcher): await self.page.evaluate(f"var include_filters=''") MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) + + self.content = await self.page.content + + # Now take screenshot (scrolling may trigger layout changes, but measurements are already captured) + self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format) self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, { "visualselector_xpath_selectors": visualselector_xpath_selectors, "max_height": MAX_TOTAL_HEIGHT @@ -361,12 +373,9 @@ class fetcher(Fetcher): if not self.xpath_data: raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)") + self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS) - self.content = await self.page.content - - self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format) - # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need logger.success(f"Fetching '{url}' complete, closing page") await self.page.close() diff --git a/changedetectionio/content_fetchers/res/lock-elements-sizing.js b/changedetectionio/content_fetchers/res/lock-elements-sizing.js new file mode 100644 index 00000000..9b58e715 --- /dev/null +++ b/changedetectionio/content_fetchers/res/lock-elements-sizing.js @@ -0,0 +1,79 @@ +/** + * Lock Element Dimensions for Screenshot Capture + * + * THE PROBLEM: + * When taking full-page screenshots of tall pages, Chrome/Puppeteer/Playwright need to: + * 1. Temporarily change the viewport height to a large value (e.g., 800px → 3809px) + * 2. Take screenshots in chunks while scrolling + * 3. Stitch the chunks together + * + * However, changing the viewport height triggers CSS media queries like: + * @media (min-height: 860px) { .ad { height: 250px; } } + * + * This causes elements (especially ads) to resize during screenshot capture, creating a mismatch: + * - Screenshot shows element at NEW size (after media query triggered) + * - xpath element coordinates measured at OLD size (before viewport change) + * - Visual selector overlays don't align with screenshot + * + * EXAMPLE BUG: + * - Initial viewport: 1280x800, ad height: 138px, article position: 279px ✓ + * - Viewport changes to 1280x3809 for screenshot + * - Media query triggers: ad expands to 250px + * - All content below shifts down by 112px (250-138) + * - Article now at position: 391px (279+112) + * - But xpath data says 279px → 112px mismatch! ✗ + * + * THE SOLUTION: + * Before changing viewport, lock ALL element dimensions with !important inline styles. + * Inline styles with !important override media query CSS, preventing layout changes. + * + * WHAT THIS SCRIPT DOES: + * 1. Iterates through every element on the page + * 2. Captures current computed dimensions (width, height) + * 3. Sets inline styles with !important to freeze those dimensions + * 4. Disables ResizeObserver API (for JS-based resizing) + * 5. When viewport changes for screenshot, media queries can't resize anything + * 6. Layout remains consistent → xpath coordinates match screenshot ✓ + * + * USAGE: + * Execute this script BEFORE calling capture_full_page() / screenshot functions. + * The page must be fully loaded and settled at its initial viewport size. + * No need to restore state afterward - page is closed after screenshot. + * + * PERFORMANCE: + * - Iterates all DOM elements (can be 1000s on complex pages) + * - Typically completes in 50-200ms + * - One-time cost before screenshot, well worth it for coordinate accuracy + * + * @see https://github.com/dgtlmoon/changedetection.io/issues/XXXX + */ + +(() => { + // Lock ALL element dimensions to prevent media query layout changes + document.querySelectorAll('*').forEach(el => { + const computed = window.getComputedStyle(el); + const rect = el.getBoundingClientRect(); + + // Lock dimensions with !important to override media queries + if (rect.height > 0) { + el.style.setProperty('height', computed.height, 'important'); + el.style.setProperty('min-height', computed.height, 'important'); + el.style.setProperty('max-height', computed.height, 'important'); + } + if (rect.width > 0) { + el.style.setProperty('width', computed.width, 'important'); + el.style.setProperty('min-width', computed.width, 'important'); + el.style.setProperty('max-width', computed.width, 'important'); + } + }); + + // Also disable ResizeObserver for JS-based resizing + window.ResizeObserver = class { + constructor() {} + observe() {} + unobserve() {} + disconnect() {} + }; + + console.log('✓ Element dimensions locked to prevent media query changes during screenshot'); +})(); diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 1ea5cdbb..8c6873db 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -96,8 +96,17 @@ class model(watch_base): def clear_watch(self): import pathlib + # Get list of processor config files to preserve + from changedetectionio.processors import find_processors + processor_names = [name for cls, name in find_processors()] + processor_config_files = {f"{name}.json" for name in processor_names} + # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc + # But preserve processor config files (they're configuration, not history data) for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"): + # Skip processor config files + if item.name in processor_config_files: + continue os.unlink(item) # Force the attr to recalculate diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index 9011dee9..f031f562 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -328,6 +328,26 @@ def get_custom_watch_obj_for_processor(processor_name): return watch_class +def find_processor_module(processor_name): + """ + Find the processor module by name. + + Args: + processor_name: Processor machine name (e.g., 'image_ssim_diff') + + Returns: + module: The processor's parent module, or None if not found + """ + processor_classes = find_processors() + processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None) + + if processor_tuple: + # Return the parent module (the package containing processor.py) + return get_parent_module(processor_tuple[0]) + + return None + + def available_processors(): """ Get a list of processors by name and description for the UI elements. diff --git a/changedetectionio/processors/image_ssim_diff/__init__.py b/changedetectionio/processors/image_ssim_diff/__init__.py index cc10eac1..1b1f94c0 100644 --- a/changedetectionio/processors/image_ssim_diff/__init__.py +++ b/changedetectionio/processors/image_ssim_diff/__init__.py @@ -16,3 +16,6 @@ processor_weight = 2 # Lower weight = appears at top, heavier weight = appears DEFAULT_COMPARISON_METHOD = os.getenv('COMPARISON_METHOD', 'opencv') DEFAULT_COMPARISON_THRESHOLD_OPENCV = float(os.getenv('COMPARISON_THRESHOLD_OPENCV', '30')) DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH = float(os.getenv('COMPARISON_THRESHOLD_PIXELMATCH', '10')) + +# Template tracking filename +CROPPED_IMAGE_TEMPLATE_FILENAME = 'cropped_image_template.png' diff --git a/changedetectionio/processors/image_ssim_diff/processor.py b/changedetectionio/processors/image_ssim_diff/processor.py index 63b1ac34..f33a2546 100644 --- a/changedetectionio/processors/image_ssim_diff/processor.py +++ b/changedetectionio/processors/image_ssim_diff/processor.py @@ -10,9 +10,10 @@ import hashlib import os import time from loguru import logger +from changedetectionio import strtobool from changedetectionio.processors import difference_detection_processor, SCREENSHOT_FORMAT_PNG from changedetectionio.processors.exceptions import ProcessorException -from . import DEFAULT_COMPARISON_METHOD, DEFAULT_COMPARISON_THRESHOLD_OPENCV, DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH +from . import DEFAULT_COMPARISON_METHOD, DEFAULT_COMPARISON_THRESHOLD_OPENCV, DEFAULT_COMPARISON_THRESHOLD_PIXELMATCH, CROPPED_IMAGE_TEMPLATE_FILENAME name = 'Visual/Image screenshot change detection' description = 'Compares screenshots using fast algorithms (OpenCV or pixelmatch), 10-100x faster than SSIM' @@ -90,9 +91,12 @@ class perform_site_check(difference_detection_processor): # Automatically use the processor name from watch config as filename processor_name = watch.get('processor', 'default') config_filename = f'{processor_name}.json' - processor_config = self.get_extra_watch_config(config_filename) + processor_config = self.get_extra_watch_config(config_filename) if self.get_extra_watch_config(config_filename) else {} bounding_box = processor_config.get('bounding_box') if processor_config else None + # Template matching for tracking content movement + template_matching_enabled = processor_config.get('auto_track_region', False) + if bounding_box: try: # Parse bounding box: "x,y,width,height" @@ -154,6 +158,8 @@ class perform_site_check(difference_detection_processor): # Crop the current image if region was found (for comparison only, keep full screenshot for history) cropped_current_img = None + original_crop_region = crop_region # Store original for template matching + if crop_region: try: cropped_current_img = current_img.crop(crop_region) @@ -194,6 +200,42 @@ class perform_site_check(difference_detection_processor): previous_img = Image.open(io.BytesIO(previous_screenshot_bytes)) + # Template matching: If enabled, search for content that may have moved + # Check if feature is globally enabled via ENV var + feature_enabled = strtobool(os.getenv('ENABLE_TEMPLATE_TRACKING', 'True')) + # Check if auto-tracking is enabled for this specific watch (determined by feature analysis) + auto_track_enabled = template_matching_enabled + + if feature_enabled and auto_track_enabled and original_crop_region: + try: + # Check if template exists, if not regenerate from previous snapshot + template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME) + if not os.path.isfile(template_path): + logger.info("Template file missing, regenerating from previous snapshot") + self._regenerate_template_from_snapshot( + previous_img, watch, original_crop_region + ) + + logger.debug("Template matching enabled - searching for region movement") + new_crop_region = self._find_region_with_template_matching( + current_img, watch, original_crop_region, search_tolerance=0.2 + ) + + if new_crop_region: + old_region = original_crop_region + crop_region = new_crop_region + logger.info(f"Template matching: Region moved from {old_region} to {new_crop_region}") + + # Update cropped image with new region + if cropped_current_img: + cropped_current_img.close() + cropped_current_img = current_img.crop(crop_region) + else: + logger.warning("Template matching: Could not find region, using original position") + + except Exception as e: + logger.warning(f"Template matching error (continuing with original position): {e}") + # Crop previous image to the same region if cropping is enabled cropped_previous_img = None if crop_region: @@ -423,3 +465,136 @@ class perform_site_check(difference_detection_processor): del alpha return changed_detected, change_percentage + + def _regenerate_template_from_snapshot(self, snapshot_img, watch, bbox): + """ + Regenerate template file from a snapshot (typically after 'clear data'). + + When user clears watch data, the template file is deleted but config remains. + This extracts the region from the previous/baseline snapshot and saves it + as the template so tracking can continue. + + Args: + snapshot_img: PIL Image to extract template from (usually previous_img) + watch: Watch object (to access data directory) + bbox: (left, top, right, bottom) bounding box coordinates + """ + try: + left, top, right, bottom = bbox + width = right - left + height = bottom - top + + # Ensure watch data directory exists + watch.ensure_data_dir_exists() + + # Crop the template region + template = snapshot_img.crop(bbox) + + # Save as PNG (lossless, no compression artifacts) + template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME) + template.save(template_path, format='PNG', optimize=True) + + logger.info(f"Regenerated template: {template_path} ({width}x{height}px)") + template.close() + + except Exception as e: + logger.error(f"Failed to regenerate template: {e}") + + def _find_region_with_template_matching(self, current_img, watch, original_bbox, search_tolerance=0.2): + """ + Use OpenCV template matching to find where content moved on the page. + + This handles cases where page layout shifts push content to different + pixel coordinates, but the visual content remains the same. + + Args: + current_img: PIL Image of current screenshot + watch: Watch object (to access template file) + original_bbox: (left, top, right, bottom) tuple of original region + search_tolerance: How far to search (0.2 = ±20% of region size) + + Returns: + tuple: New (left, top, right, bottom) region, or None if not found + """ + import cv2 + import numpy as np + + try: + # Load template from watch data directory + template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME) + + if not os.path.isfile(template_path): + logger.warning(f"Template file not found: {template_path}") + return None + from PIL import Image + + template_img = Image.open(template_path) + + # Convert images to numpy arrays for OpenCV + current_array = np.array(current_img) + template_array = np.array(template_img) + + # Convert to grayscale for matching + if len(current_array.shape) == 3: + current_gray = cv2.cvtColor(current_array, cv2.COLOR_RGB2GRAY) + else: + current_gray = current_array + + if len(template_array.shape) == 3: + template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY) + else: + template_gray = template_array + + # Calculate search region + left, top, right, bottom = original_bbox + width = right - left + height = bottom - top + + margin_x = int(width * search_tolerance) + margin_y = int(height * search_tolerance) + + # Expand search area + search_left = max(0, left - margin_x) + search_top = max(0, top - margin_y) + search_right = min(current_img.width, right + margin_x) + search_bottom = min(current_img.height, bottom + margin_y) + + # Extract search region + search_region = current_gray[search_top:search_bottom, search_left:search_right] + + logger.debug(f"Searching for template in region: ({search_left}, {search_top}) to ({search_right}, {search_bottom})") + + # Perform template matching + result = cv2.matchTemplate(search_region, template_gray, cv2.TM_CCOEFF_NORMED) + min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) + + logger.debug(f"Template matching confidence: {max_val:.2%}") + + # Check if match is good enough (80% confidence threshold) + if max_val >= 0.8: + # Calculate new bounding box in original image coordinates + match_x = search_left + max_loc[0] + match_y = search_top + max_loc[1] + + new_bbox = (match_x, match_y, match_x + width, match_y + height) + + # Calculate movement distance + move_x = abs(match_x - left) + move_y = abs(match_y - top) + + logger.info(f"Template found at ({match_x}, {match_y}), " + f"moved {move_x}px horizontally, {move_y}px vertically, " + f"confidence: {max_val:.2%}") + + # Close template image + template_img.close() + + return new_bbox + else: + logger.warning(f"Template match confidence too low: {max_val:.2%} (need 80%)") + template_img.close() + return None + + except Exception as e: + logger.error(f"Template matching error: {e}") + return None