memory fixes

2025-12-19 14:35:35 +00:00 · 2025-12-18 19:03:09 +01:00
parent bcf25ed15e
commit 53076d2135
4 changed files with 252 additions and 58 deletions
--- a/changedetectionio/blueprint/ui/diff.py
+++ b/changedetectionio/blueprint/ui/diff.py
@@ -1,6 +1,5 @@
 from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory
-import os
-import time
+
 import re
 import importlib
 from loguru import logger
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -13,6 +13,83 @@ from .. import jinja2_custom as safe_jinja
 from ..diff import ADDED_PLACEMARKER_OPEN
 from ..html_tools import TRANSLATE_WHITESPACE_TABLE

+
+def _brotli_compress_worker(contents, filepath, mode=None):
+    """
+    Worker function to compress data with brotli in a separate process.
+    This isolates memory - when process exits, OS reclaims all memory.
+
+    Args:
+        contents: bytes to compress
+        filepath: destination file path
+        mode: brotli compression mode (e.g., brotli.MODE_TEXT)
+    """
+    import brotli
+
+    try:
+        if mode is not None:
+            compressed_data = brotli.compress(contents, mode=mode)
+        else:
+            compressed_data = brotli.compress(contents)
+
+        with open(filepath, 'wb') as f:
+            f.write(compressed_data)
+
+        # No need for explicit cleanup - process exit frees all memory
+        return True
+    except Exception as e:
+        logger.error(f"Brotli compression worker failed: {e}")
+        return False
+
+
+def _brotli_subprocess_save(contents, filepath, mode=None, timeout=30, fallback_uncompressed=False):
+    """
+    Save compressed data using subprocess to isolate memory.
+
+    Args:
+        contents: data to compress (str or bytes)
+        filepath: destination file path
+        mode: brotli compression mode (e.g., brotli.MODE_TEXT)
+        timeout: subprocess timeout in seconds
+        fallback_uncompressed: if True, save uncompressed on failure; if False, raise exception
+
+    Returns:
+        str: actual filepath saved (may differ from input if fallback used)
+
+    Raises:
+        Exception: if compression fails and fallback_uncompressed is False
+    """
+    import brotli
+    from multiprocessing import Process
+
+    # Ensure contents are bytes
+    if isinstance(contents, str):
+        contents = contents.encode('utf-8')
+
+    # Run compression in subprocess
+    proc = Process(target=_brotli_compress_worker, args=(contents, filepath, mode))
+    proc.start()
+    proc.join(timeout=timeout)
+
+    if proc.is_alive():
+        logger.warning(f"Brotli compression subprocess timed out after {timeout}s")
+        proc.terminate()
+        proc.join()
+
+    # Check if file was created successfully
+    if os.path.exists(filepath):
+        return filepath
+
+    # Compression failed
+    if fallback_uncompressed:
+        logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed")
+        fallback_path = filepath.replace('.br', '')
+        with open(fallback_path, 'wb') as f:
+            f.write(contents)
+        return fallback_path
+    else:
+        raise Exception(f"Brotli compression subprocess failed for {filepath}")
+
 FAVICON_RESAVE_THRESHOLD_SECONDS=86400


@@ -359,7 +436,8 @@ class model(watch_base):
            # Text data (HTML, JSON, etc.) - apply brotli compression
            if not skip_brotli and len(contents) > threshold:
                snapshot_fname = f"{snapshot_id}.txt.br"
-                encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
+                # Use subprocess for brotli compression to isolate memory
+                encoded_data = None  # Will be compressed in subprocess
            else:
                snapshot_fname = f"{snapshot_id}.txt"
                encoded_data = contents.encode('utf-8')
@@ -368,12 +446,29 @@ class model(watch_base):

        # Write snapshot file atomically if it doesn't exist
        if not os.path.exists(dest):
+            if encoded_data is None:
+                # Brotli compression in subprocess
+                try:
+                    actual_dest = _brotli_subprocess_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True)
+                    # Update snapshot_fname if fallback was used
+                    if actual_dest != dest:
+                        snapshot_fname = os.path.basename(actual_dest)
+                except Exception as e:
+                    logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
+                    # Last resort fallback
+                    snapshot_fname = f"{snapshot_id}.txt"
+                    dest = os.path.join(self.watch_data_dir, snapshot_fname)
+                    with open(dest, 'wb') as f:
+                        f.write(contents.encode('utf-8'))
+            else:
+                # Binary or small text - write directly
                with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
                    tmp.write(encoded_data)
                    tmp.flush()
                    os.fsync(tmp.fileno())
                    tmp_path = tmp.name
                os.rename(tmp_path, dest)
+                del encoded_data

        # Append to history.txt atomically
        index_fname = os.path.join(self.watch_data_dir, "history.txt")
@@ -788,25 +883,13 @@ class model(watch_base):
    def save_last_text_fetched_before_filters(self, contents):
        import brotli
        filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
-        with open(filepath, 'wb') as f:
-            f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
+        _brotli_subprocess_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)

    def save_last_fetched_html(self, timestamp, contents):
-        import brotli
-
        self.ensure_data_dir_exists()
        snapshot_fname = f"{timestamp}.html.br"
        filepath = os.path.join(self.watch_data_dir, snapshot_fname)
-
-        with open(filepath, 'wb') as f:
-            contents = contents.encode('utf-8') if isinstance(contents, str) else contents
-            try:
-                f.write(brotli.compress(contents))
-            except Exception as e:
-                logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
-                logger.warning(e)
-                f.write(contents)
-
+        _brotli_subprocess_save(contents, filepath, mode=None, fallback_uncompressed=True)
        self._prune_last_fetched_html_snapshots()

    def get_fetched_html(self, timestamp):
--- a/changedetectionio/processors/image_ssim_diff/difference.py
+++ b/changedetectionio/processors/image_ssim_diff/difference.py
@@ -312,6 +312,8 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
    Returns:
        Rendered template or redirect
    """
+    import gc
+    from flask import after_this_request
    # Get version parameters (from_version, to_version)
    versions = list(watch.history.keys())

@@ -486,9 +488,22 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
        except Exception as e:
            logger.warning(f"Failed to load comparison history data: {e}")

+    # Register cleanup callback to release memory after response is sent
+    @after_this_request
+    def cleanup_memory(response):
+        """Force garbage collection after response sent to release large image data."""
+        try:
+            # Force garbage collection to immediately release memory
+            # This helps ensure base64 image strings (which can be 5-10MB+) are freed
+            collected = gc.collect()
+            logger.debug(f"Memory cleanup: Forced GC after diff render (collected {collected} objects)")
+        except Exception as e:
+            logger.warning(f"Memory cleanup GC failed: {e}")
+        return response
+
    # Render custom template
    # Template path is namespaced to avoid conflicts with other processors
-    return render_template(
+    response = render_template(
        'image_ssim_diff/diff.html',
        watch=watch,
        uuid=watch.get('uuid'),
@@ -504,3 +519,9 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
        to_version=to_version,
        percentage_different=change_percentage
    )
+
+    # Explicitly delete large base64 strings now that they're in the response
+    # This helps free memory before the function returns
+    del img_from_b64, img_to_b64, diff_image_b64, comparison_data
+
+    return response
--- a/changedetectionio/processors/image_ssim_diff/processor.py
+++ b/changedetectionio/processors/image_ssim_diff/processor.py
@@ -24,7 +24,7 @@ class perform_site_check(difference_detection_processor):
    """Fast screenshot comparison processor."""

    # Override to use PNG format for better image comparison (JPEG compression creates noise)
-    screenshot_format = SCREENSHOT_FORMAT_PNG
+    #screenshot_format = SCREENSHOT_FORMAT_PNG

    def run_changedetection(self, watch):
        """
@@ -175,11 +175,13 @@ class perform_site_check(difference_detection_processor):
            logger.info(f"First check for watch {watch.get('uuid')} - saving baseline screenshot")

            # Close the PIL images before returning
+            import gc
            current_img.close()
            del current_img
            if cropped_current_img:
                cropped_current_img.close()
                del cropped_current_img
+            gc.collect()

            update_obj = {
                'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
@@ -248,12 +250,14 @@ class perform_site_check(difference_detection_processor):
        except Exception as e:
            logger.warning(f"Failed to load previous screenshot for comparison: {e}")
            # Clean up current images before returning
+            import gc
            if 'current_img' in locals():
                current_img.close()
                del current_img
            if 'cropped_current_img' in locals() and cropped_current_img:
                cropped_current_img.close()
                del cropped_current_img
+            gc.collect()

            # If we can't load previous, treat as first check
            update_obj = {
@@ -273,10 +277,12 @@ class perform_site_check(difference_detection_processor):
            # Ensure images are the same size
            if img_for_comparison_curr.size != img_for_comparison_prev.size:
                logger.info(f"Resizing images to match: {img_for_comparison_prev.size} -> {img_for_comparison_curr.size}")
-                img_for_comparison_prev = img_for_comparison_prev.resize(img_for_comparison_curr.size, Image.Resampling.LANCZOS)
-                # If we resized a cropped version, update the reference
-                if cropped_previous_img:
-                    cropped_previous_img = img_for_comparison_prev
+                resized_img = img_for_comparison_prev.resize(img_for_comparison_curr.size, Image.Resampling.LANCZOS)
+                # If we resized a cropped version, close the old cropped image before replacing
+                if cropped_previous_img and img_for_comparison_prev is cropped_previous_img:
+                    cropped_previous_img.close()
+                    cropped_previous_img = resized_img
+                img_for_comparison_prev = resized_img

            if comparison_method == 'pixelmatch':
                changed_detected, change_score = self._compare_pixelmatch(
@@ -290,6 +296,7 @@ class perform_site_check(difference_detection_processor):
                logger.info(f"OpenCV: {change_score:.2f}% pixels changed, threshold: {threshold:.0f}")

            # Explicitly close PIL images to free memory immediately
+            import gc
            current_img.close()
            previous_img.close()
            del current_img
@@ -302,6 +309,9 @@ class perform_site_check(difference_detection_processor):
                del cropped_previous_img
            del previous_screenshot_bytes  # Release the large bytes object

+            # Force garbage collection to immediately release memory
+            gc.collect()
+
        except Exception as e:
            logger.error(f"Failed to compare screenshots: {e}")
            # Ensure cleanup even on error
@@ -349,6 +359,7 @@ class perform_site_check(difference_detection_processor):
        """
        import cv2
        import numpy as np
+        import gc

        # Convert PIL images to numpy arrays
        arr_from = np.array(img_from)
@@ -362,6 +373,14 @@ class perform_site_check(difference_detection_processor):
            gray_from = arr_from
            gray_to = arr_to

+        # Release original arrays if we converted them
+        if gray_from is not arr_from:
+            del arr_from
+            arr_from = None
+        if gray_to is not arr_to:
+            del arr_to
+            arr_to = None
+
        # Optional: Apply Gaussian blur to reduce sensitivity to minor rendering differences
        # Controlled by environment variable, default sigma=0.8
        blur_sigma = float(os.getenv("OPENCV_BLUR_SIGMA", "0.8"))
@@ -386,9 +405,13 @@ class perform_site_check(difference_detection_processor):
        changed_detected = change_percentage > min_change_percentage

        # Explicit memory cleanup - mark large arrays for garbage collection
-        del arr_from, arr_to
+        if arr_from is not None:
+            del arr_from
+        if arr_to is not None:
+            del arr_to
        del gray_from, gray_to
        del diff, thresh
+        gc.collect()

        return changed_detected, change_percentage

@@ -415,18 +438,26 @@ class perform_site_check(difference_detection_processor):
            return self._compare_opencv(img_from, img_to, threshold * 255)

        import numpy as np
+        import gc
+
+        # Track converted images so we can close them
+        converted_img_from = None
+        converted_img_to = None

        # Convert to RGB if not already
        if img_from.mode != 'RGB':
-            img_from = img_from.convert('RGB')
+            converted_img_from = img_from.convert('RGB')
+            img_from = converted_img_from
        if img_to.mode != 'RGB':
-            img_to = img_to.convert('RGB')
+            converted_img_to = img_to.convert('RGB')
+            img_to = converted_img_to

        # Convert to numpy arrays (pixelmatch expects RGBA format)
        arr_from = np.array(img_from)
        arr_to = np.array(img_to)

        # Add alpha channel (pixelmatch expects RGBA)
+        alpha = None
        if arr_from.shape[2] == 3:
            alpha = np.ones((arr_from.shape[0], arr_from.shape[1], 1), dtype=np.uint8) * 255
            arr_from = np.concatenate([arr_from, alpha], axis=2)
@@ -458,11 +489,16 @@ class perform_site_check(difference_detection_processor):
        min_change_percentage = float(os.getenv("PIXELMATCH_MIN_CHANGE_PERCENT", "0.1"))
        changed_detected = change_percentage > min_change_percentage

-        # Explicit memory cleanup - mark large arrays for garbage collection
+        # Explicit memory cleanup - close converted images and delete arrays
+        if converted_img_from is not None:
+            converted_img_from.close()
+        if converted_img_to is not None:
+            converted_img_to.close()
        del arr_from, arr_to
        del diff_array
-        if 'alpha' in locals():
+        if alpha is not None:
            del alpha
+        gc.collect()

        return changed_detected, change_percentage

@@ -518,6 +554,15 @@ class perform_site_check(difference_detection_processor):
        """
        import cv2
        import numpy as np
+        import gc
+
+        template_img = None
+        current_array = None
+        template_array = None
+        current_gray = None
+        template_gray = None
+        search_region = None
+        result = None

        try:
            # Load template from watch data directory
@@ -529,23 +574,9 @@ class perform_site_check(difference_detection_processor):
            from PIL import Image

            template_img = Image.open(template_path)
+            template_img.load()  # Force load image data into memory

-            # Convert images to numpy arrays for OpenCV
-            current_array = np.array(current_img)
-            template_array = np.array(template_img)
-
-            # Convert to grayscale for matching
-            if len(current_array.shape) == 3:
-                current_gray = cv2.cvtColor(current_array, cv2.COLOR_RGB2GRAY)
-            else:
-                current_gray = current_array
-
-            if len(template_array.shape) == 3:
-                template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY)
-            else:
-                template_gray = template_array
-
-            # Calculate search region
+            # Calculate search region dimensions first
            left, top, right, bottom = original_bbox
            width = right - left
            height = bottom - top
@@ -559,15 +590,56 @@ class perform_site_check(difference_detection_processor):
            search_right = min(current_img.width, right + margin_x)
            search_bottom = min(current_img.height, bottom + margin_y)

-            # Extract search region
-            search_region = current_gray[search_top:search_bottom, search_left:search_right]
+            # Convert only the search region of current image to numpy array (not the whole image!)
+            current_img_cropped = current_img.crop((search_left, search_top, search_right, search_bottom))
+            current_array = np.array(current_img_cropped)
+            current_img_cropped.close()  # Close immediately after conversion
+            del current_img_cropped
+
+            # Convert template to numpy array
+            template_array = np.array(template_img)
+
+            # Close template image immediately after conversion
+            template_img.close()
+            template_img = None
+
+            # Convert to grayscale for matching
+            if len(current_array.shape) == 3:
+                current_gray = cv2.cvtColor(current_array, cv2.COLOR_RGB2GRAY)
+                del current_array  # Delete immediately
+                current_array = None
+            else:
+                current_gray = current_array
+                current_array = None  # Just transfer reference
+
+            if len(template_array.shape) == 3:
+                template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY)
+                del template_array  # Delete immediately
+                template_array = None
+            else:
+                template_gray = template_array
+                template_array = None  # Just transfer reference

            logger.debug(f"Searching for template in region: ({search_left}, {search_top}) to ({search_right}, {search_bottom})")

-            # Perform template matching
-            result = cv2.matchTemplate(search_region, template_gray, cv2.TM_CCOEFF_NORMED)
+            # Perform template matching (search_region is now just current_gray since we pre-cropped)
+            result = cv2.matchTemplate(current_gray, template_gray, cv2.TM_CCOEFF_NORMED)
+
+            # Delete arrays immediately after matchTemplate
+            del current_gray
+            current_gray = None
+            del template_gray
+            template_gray = None
+
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

+            # Delete result array immediately after getting values
+            del result
+            result = None
+
+            # Force garbage collection now that large arrays are freed
+            gc.collect()
+
            logger.debug(f"Template matching confidence: {max_val:.2%}")

            # Check if match is good enough (80% confidence threshold)
@@ -586,15 +658,34 @@ class perform_site_check(difference_detection_processor):
                           f"moved {move_x}px horizontally, {move_y}px vertically, "
                           f"confidence: {max_val:.2%}")

-                # Close template image
-                template_img.close()
-
                return new_bbox
            else:
                logger.warning(f"Template match confidence too low: {max_val:.2%} (need 80%)")
-                template_img.close()
                return None

        except Exception as e:
            logger.error(f"Template matching error: {e}")
            return None
+
+        finally:
+            # Cleanup any remaining objects (in case of early return or exception)
+            if template_img is not None:
+                try:
+                    template_img.close()
+                except:
+                    pass
+            if result is not None:
+                del result
+            if search_region is not None:
+                del search_region
+            if template_gray is not None:
+                del template_gray
+            if template_array is not None:
+                del template_array
+            if current_gray is not None:
+                del current_gray
+            if current_array is not None:
+                del current_array
+
+            # Force garbage collection to immediately release memory
+            gc.collect()