diff --git a/changedetectionio/blueprint/ui/diff.py b/changedetectionio/blueprint/ui/diff.py index ed99ab8d..e0bdd4ba 100644 --- a/changedetectionio/blueprint/ui/diff.py +++ b/changedetectionio/blueprint/ui/diff.py @@ -1,6 +1,5 @@ from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory -import os -import time + import re import importlib from loguru import logger diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 8c6873db..111bbc00 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -13,6 +13,83 @@ from .. import jinja2_custom as safe_jinja from ..diff import ADDED_PLACEMARKER_OPEN from ..html_tools import TRANSLATE_WHITESPACE_TABLE + +def _brotli_compress_worker(contents, filepath, mode=None): + """ + Worker function to compress data with brotli in a separate process. + This isolates memory - when process exits, OS reclaims all memory. + + Args: + contents: bytes to compress + filepath: destination file path + mode: brotli compression mode (e.g., brotli.MODE_TEXT) + """ + import brotli + + try: + if mode is not None: + compressed_data = brotli.compress(contents, mode=mode) + else: + compressed_data = brotli.compress(contents) + + with open(filepath, 'wb') as f: + f.write(compressed_data) + + # No need for explicit cleanup - process exit frees all memory + return True + except Exception as e: + logger.error(f"Brotli compression worker failed: {e}") + return False + + +def _brotli_subprocess_save(contents, filepath, mode=None, timeout=30, fallback_uncompressed=False): + """ + Save compressed data using subprocess to isolate memory. + + Args: + contents: data to compress (str or bytes) + filepath: destination file path + mode: brotli compression mode (e.g., brotli.MODE_TEXT) + timeout: subprocess timeout in seconds + fallback_uncompressed: if True, save uncompressed on failure; if False, raise exception + + Returns: + str: actual filepath saved (may differ from input if fallback used) + + Raises: + Exception: if compression fails and fallback_uncompressed is False + """ + import brotli + from multiprocessing import Process + + # Ensure contents are bytes + if isinstance(contents, str): + contents = contents.encode('utf-8') + + # Run compression in subprocess + proc = Process(target=_brotli_compress_worker, args=(contents, filepath, mode)) + proc.start() + proc.join(timeout=timeout) + + if proc.is_alive(): + logger.warning(f"Brotli compression subprocess timed out after {timeout}s") + proc.terminate() + proc.join() + + # Check if file was created successfully + if os.path.exists(filepath): + return filepath + + # Compression failed + if fallback_uncompressed: + logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed") + fallback_path = filepath.replace('.br', '') + with open(fallback_path, 'wb') as f: + f.write(contents) + return fallback_path + else: + raise Exception(f"Brotli compression subprocess failed for {filepath}") + FAVICON_RESAVE_THRESHOLD_SECONDS=86400 @@ -359,7 +436,8 @@ class model(watch_base): # Text data (HTML, JSON, etc.) - apply brotli compression if not skip_brotli and len(contents) > threshold: snapshot_fname = f"{snapshot_id}.txt.br" - encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT) + # Use subprocess for brotli compression to isolate memory + encoded_data = None # Will be compressed in subprocess else: snapshot_fname = f"{snapshot_id}.txt" encoded_data = contents.encode('utf-8') @@ -368,12 +446,29 @@ class model(watch_base): # Write snapshot file atomically if it doesn't exist if not os.path.exists(dest): - with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp: - tmp.write(encoded_data) - tmp.flush() - os.fsync(tmp.fileno()) - tmp_path = tmp.name - os.rename(tmp_path, dest) + if encoded_data is None: + # Brotli compression in subprocess + try: + actual_dest = _brotli_subprocess_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True) + # Update snapshot_fname if fallback was used + if actual_dest != dest: + snapshot_fname = os.path.basename(actual_dest) + except Exception as e: + logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}") + # Last resort fallback + snapshot_fname = f"{snapshot_id}.txt" + dest = os.path.join(self.watch_data_dir, snapshot_fname) + with open(dest, 'wb') as f: + f.write(contents.encode('utf-8')) + else: + # Binary or small text - write directly + with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp: + tmp.write(encoded_data) + tmp.flush() + os.fsync(tmp.fileno()) + tmp_path = tmp.name + os.rename(tmp_path, dest) + del encoded_data # Append to history.txt atomically index_fname = os.path.join(self.watch_data_dir, "history.txt") @@ -788,25 +883,13 @@ class model(watch_base): def save_last_text_fetched_before_filters(self, contents): import brotli filepath = os.path.join(self.watch_data_dir, 'last-fetched.br') - with open(filepath, 'wb') as f: - f.write(brotli.compress(contents, mode=brotli.MODE_TEXT)) + _brotli_subprocess_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False) def save_last_fetched_html(self, timestamp, contents): - import brotli - self.ensure_data_dir_exists() snapshot_fname = f"{timestamp}.html.br" filepath = os.path.join(self.watch_data_dir, snapshot_fname) - - with open(filepath, 'wb') as f: - contents = contents.encode('utf-8') if isinstance(contents, str) else contents - try: - f.write(brotli.compress(contents)) - except Exception as e: - logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}") - logger.warning(e) - f.write(contents) - + _brotli_subprocess_save(contents, filepath, mode=None, fallback_uncompressed=True) self._prune_last_fetched_html_snapshots() def get_fetched_html(self, timestamp): diff --git a/changedetectionio/processors/image_ssim_diff/difference.py b/changedetectionio/processors/image_ssim_diff/difference.py index 18d8136e..e37420fe 100644 --- a/changedetectionio/processors/image_ssim_diff/difference.py +++ b/changedetectionio/processors/image_ssim_diff/difference.py @@ -312,6 +312,8 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect) Returns: Rendered template or redirect """ + import gc + from flask import after_this_request # Get version parameters (from_version, to_version) versions = list(watch.history.keys()) @@ -486,9 +488,22 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect) except Exception as e: logger.warning(f"Failed to load comparison history data: {e}") + # Register cleanup callback to release memory after response is sent + @after_this_request + def cleanup_memory(response): + """Force garbage collection after response sent to release large image data.""" + try: + # Force garbage collection to immediately release memory + # This helps ensure base64 image strings (which can be 5-10MB+) are freed + collected = gc.collect() + logger.debug(f"Memory cleanup: Forced GC after diff render (collected {collected} objects)") + except Exception as e: + logger.warning(f"Memory cleanup GC failed: {e}") + return response + # Render custom template # Template path is namespaced to avoid conflicts with other processors - return render_template( + response = render_template( 'image_ssim_diff/diff.html', watch=watch, uuid=watch.get('uuid'), @@ -504,3 +519,9 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect) to_version=to_version, percentage_different=change_percentage ) + + # Explicitly delete large base64 strings now that they're in the response + # This helps free memory before the function returns + del img_from_b64, img_to_b64, diff_image_b64, comparison_data + + return response diff --git a/changedetectionio/processors/image_ssim_diff/processor.py b/changedetectionio/processors/image_ssim_diff/processor.py index f12d022b..e48821e6 100644 --- a/changedetectionio/processors/image_ssim_diff/processor.py +++ b/changedetectionio/processors/image_ssim_diff/processor.py @@ -24,7 +24,7 @@ class perform_site_check(difference_detection_processor): """Fast screenshot comparison processor.""" # Override to use PNG format for better image comparison (JPEG compression creates noise) - screenshot_format = SCREENSHOT_FORMAT_PNG + #screenshot_format = SCREENSHOT_FORMAT_PNG def run_changedetection(self, watch): """ @@ -175,11 +175,13 @@ class perform_site_check(difference_detection_processor): logger.info(f"First check for watch {watch.get('uuid')} - saving baseline screenshot") # Close the PIL images before returning + import gc current_img.close() del current_img if cropped_current_img: cropped_current_img.close() del cropped_current_img + gc.collect() update_obj = { 'previous_md5': hashlib.md5(self.screenshot).hexdigest(), @@ -248,12 +250,14 @@ class perform_site_check(difference_detection_processor): except Exception as e: logger.warning(f"Failed to load previous screenshot for comparison: {e}") # Clean up current images before returning + import gc if 'current_img' in locals(): current_img.close() del current_img if 'cropped_current_img' in locals() and cropped_current_img: cropped_current_img.close() del cropped_current_img + gc.collect() # If we can't load previous, treat as first check update_obj = { @@ -273,10 +277,12 @@ class perform_site_check(difference_detection_processor): # Ensure images are the same size if img_for_comparison_curr.size != img_for_comparison_prev.size: logger.info(f"Resizing images to match: {img_for_comparison_prev.size} -> {img_for_comparison_curr.size}") - img_for_comparison_prev = img_for_comparison_prev.resize(img_for_comparison_curr.size, Image.Resampling.LANCZOS) - # If we resized a cropped version, update the reference - if cropped_previous_img: - cropped_previous_img = img_for_comparison_prev + resized_img = img_for_comparison_prev.resize(img_for_comparison_curr.size, Image.Resampling.LANCZOS) + # If we resized a cropped version, close the old cropped image before replacing + if cropped_previous_img and img_for_comparison_prev is cropped_previous_img: + cropped_previous_img.close() + cropped_previous_img = resized_img + img_for_comparison_prev = resized_img if comparison_method == 'pixelmatch': changed_detected, change_score = self._compare_pixelmatch( @@ -290,6 +296,7 @@ class perform_site_check(difference_detection_processor): logger.info(f"OpenCV: {change_score:.2f}% pixels changed, threshold: {threshold:.0f}") # Explicitly close PIL images to free memory immediately + import gc current_img.close() previous_img.close() del current_img @@ -302,6 +309,9 @@ class perform_site_check(difference_detection_processor): del cropped_previous_img del previous_screenshot_bytes # Release the large bytes object + # Force garbage collection to immediately release memory + gc.collect() + except Exception as e: logger.error(f"Failed to compare screenshots: {e}") # Ensure cleanup even on error @@ -349,6 +359,7 @@ class perform_site_check(difference_detection_processor): """ import cv2 import numpy as np + import gc # Convert PIL images to numpy arrays arr_from = np.array(img_from) @@ -362,6 +373,14 @@ class perform_site_check(difference_detection_processor): gray_from = arr_from gray_to = arr_to + # Release original arrays if we converted them + if gray_from is not arr_from: + del arr_from + arr_from = None + if gray_to is not arr_to: + del arr_to + arr_to = None + # Optional: Apply Gaussian blur to reduce sensitivity to minor rendering differences # Controlled by environment variable, default sigma=0.8 blur_sigma = float(os.getenv("OPENCV_BLUR_SIGMA", "0.8")) @@ -386,9 +405,13 @@ class perform_site_check(difference_detection_processor): changed_detected = change_percentage > min_change_percentage # Explicit memory cleanup - mark large arrays for garbage collection - del arr_from, arr_to + if arr_from is not None: + del arr_from + if arr_to is not None: + del arr_to del gray_from, gray_to del diff, thresh + gc.collect() return changed_detected, change_percentage @@ -415,18 +438,26 @@ class perform_site_check(difference_detection_processor): return self._compare_opencv(img_from, img_to, threshold * 255) import numpy as np + import gc + + # Track converted images so we can close them + converted_img_from = None + converted_img_to = None # Convert to RGB if not already if img_from.mode != 'RGB': - img_from = img_from.convert('RGB') + converted_img_from = img_from.convert('RGB') + img_from = converted_img_from if img_to.mode != 'RGB': - img_to = img_to.convert('RGB') + converted_img_to = img_to.convert('RGB') + img_to = converted_img_to # Convert to numpy arrays (pixelmatch expects RGBA format) arr_from = np.array(img_from) arr_to = np.array(img_to) # Add alpha channel (pixelmatch expects RGBA) + alpha = None if arr_from.shape[2] == 3: alpha = np.ones((arr_from.shape[0], arr_from.shape[1], 1), dtype=np.uint8) * 255 arr_from = np.concatenate([arr_from, alpha], axis=2) @@ -458,11 +489,16 @@ class perform_site_check(difference_detection_processor): min_change_percentage = float(os.getenv("PIXELMATCH_MIN_CHANGE_PERCENT", "0.1")) changed_detected = change_percentage > min_change_percentage - # Explicit memory cleanup - mark large arrays for garbage collection + # Explicit memory cleanup - close converted images and delete arrays + if converted_img_from is not None: + converted_img_from.close() + if converted_img_to is not None: + converted_img_to.close() del arr_from, arr_to del diff_array - if 'alpha' in locals(): + if alpha is not None: del alpha + gc.collect() return changed_detected, change_percentage @@ -518,6 +554,15 @@ class perform_site_check(difference_detection_processor): """ import cv2 import numpy as np + import gc + + template_img = None + current_array = None + template_array = None + current_gray = None + template_gray = None + search_region = None + result = None try: # Load template from watch data directory @@ -529,23 +574,9 @@ class perform_site_check(difference_detection_processor): from PIL import Image template_img = Image.open(template_path) + template_img.load() # Force load image data into memory - # Convert images to numpy arrays for OpenCV - current_array = np.array(current_img) - template_array = np.array(template_img) - - # Convert to grayscale for matching - if len(current_array.shape) == 3: - current_gray = cv2.cvtColor(current_array, cv2.COLOR_RGB2GRAY) - else: - current_gray = current_array - - if len(template_array.shape) == 3: - template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY) - else: - template_gray = template_array - - # Calculate search region + # Calculate search region dimensions first left, top, right, bottom = original_bbox width = right - left height = bottom - top @@ -559,15 +590,56 @@ class perform_site_check(difference_detection_processor): search_right = min(current_img.width, right + margin_x) search_bottom = min(current_img.height, bottom + margin_y) - # Extract search region - search_region = current_gray[search_top:search_bottom, search_left:search_right] + # Convert only the search region of current image to numpy array (not the whole image!) + current_img_cropped = current_img.crop((search_left, search_top, search_right, search_bottom)) + current_array = np.array(current_img_cropped) + current_img_cropped.close() # Close immediately after conversion + del current_img_cropped + + # Convert template to numpy array + template_array = np.array(template_img) + + # Close template image immediately after conversion + template_img.close() + template_img = None + + # Convert to grayscale for matching + if len(current_array.shape) == 3: + current_gray = cv2.cvtColor(current_array, cv2.COLOR_RGB2GRAY) + del current_array # Delete immediately + current_array = None + else: + current_gray = current_array + current_array = None # Just transfer reference + + if len(template_array.shape) == 3: + template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY) + del template_array # Delete immediately + template_array = None + else: + template_gray = template_array + template_array = None # Just transfer reference logger.debug(f"Searching for template in region: ({search_left}, {search_top}) to ({search_right}, {search_bottom})") - # Perform template matching - result = cv2.matchTemplate(search_region, template_gray, cv2.TM_CCOEFF_NORMED) + # Perform template matching (search_region is now just current_gray since we pre-cropped) + result = cv2.matchTemplate(current_gray, template_gray, cv2.TM_CCOEFF_NORMED) + + # Delete arrays immediately after matchTemplate + del current_gray + current_gray = None + del template_gray + template_gray = None + min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) + # Delete result array immediately after getting values + del result + result = None + + # Force garbage collection now that large arrays are freed + gc.collect() + logger.debug(f"Template matching confidence: {max_val:.2%}") # Check if match is good enough (80% confidence threshold) @@ -586,15 +658,34 @@ class perform_site_check(difference_detection_processor): f"moved {move_x}px horizontally, {move_y}px vertically, " f"confidence: {max_val:.2%}") - # Close template image - template_img.close() - return new_bbox else: logger.warning(f"Template match confidence too low: {max_val:.2%} (need 80%)") - template_img.close() return None except Exception as e: logger.error(f"Template matching error: {e}") return None + + finally: + # Cleanup any remaining objects (in case of early return or exception) + if template_img is not None: + try: + template_img.close() + except: + pass + if result is not None: + del result + if search_region is not None: + del search_region + if template_gray is not None: + del template_gray + if template_array is not None: + del template_array + if current_gray is not None: + del current_gray + if current_array is not None: + del current_array + + # Force garbage collection to immediately release memory + gc.collect()