mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-19 14:35:35 +00:00
memory fixes
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory
|
||||
import os
|
||||
import time
|
||||
|
||||
import re
|
||||
import importlib
|
||||
from loguru import logger
|
||||
|
||||
@@ -13,6 +13,83 @@ from .. import jinja2_custom as safe_jinja
|
||||
from ..diff import ADDED_PLACEMARKER_OPEN
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
|
||||
|
||||
def _brotli_compress_worker(contents, filepath, mode=None):
|
||||
"""
|
||||
Worker function to compress data with brotli in a separate process.
|
||||
This isolates memory - when process exits, OS reclaims all memory.
|
||||
|
||||
Args:
|
||||
contents: bytes to compress
|
||||
filepath: destination file path
|
||||
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
|
||||
"""
|
||||
import brotli
|
||||
|
||||
try:
|
||||
if mode is not None:
|
||||
compressed_data = brotli.compress(contents, mode=mode)
|
||||
else:
|
||||
compressed_data = brotli.compress(contents)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(compressed_data)
|
||||
|
||||
# No need for explicit cleanup - process exit frees all memory
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Brotli compression worker failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _brotli_subprocess_save(contents, filepath, mode=None, timeout=30, fallback_uncompressed=False):
|
||||
"""
|
||||
Save compressed data using subprocess to isolate memory.
|
||||
|
||||
Args:
|
||||
contents: data to compress (str or bytes)
|
||||
filepath: destination file path
|
||||
mode: brotli compression mode (e.g., brotli.MODE_TEXT)
|
||||
timeout: subprocess timeout in seconds
|
||||
fallback_uncompressed: if True, save uncompressed on failure; if False, raise exception
|
||||
|
||||
Returns:
|
||||
str: actual filepath saved (may differ from input if fallback used)
|
||||
|
||||
Raises:
|
||||
Exception: if compression fails and fallback_uncompressed is False
|
||||
"""
|
||||
import brotli
|
||||
from multiprocessing import Process
|
||||
|
||||
# Ensure contents are bytes
|
||||
if isinstance(contents, str):
|
||||
contents = contents.encode('utf-8')
|
||||
|
||||
# Run compression in subprocess
|
||||
proc = Process(target=_brotli_compress_worker, args=(contents, filepath, mode))
|
||||
proc.start()
|
||||
proc.join(timeout=timeout)
|
||||
|
||||
if proc.is_alive():
|
||||
logger.warning(f"Brotli compression subprocess timed out after {timeout}s")
|
||||
proc.terminate()
|
||||
proc.join()
|
||||
|
||||
# Check if file was created successfully
|
||||
if os.path.exists(filepath):
|
||||
return filepath
|
||||
|
||||
# Compression failed
|
||||
if fallback_uncompressed:
|
||||
logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed")
|
||||
fallback_path = filepath.replace('.br', '')
|
||||
with open(fallback_path, 'wb') as f:
|
||||
f.write(contents)
|
||||
return fallback_path
|
||||
else:
|
||||
raise Exception(f"Brotli compression subprocess failed for {filepath}")
|
||||
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
|
||||
|
||||
@@ -359,7 +436,8 @@ class model(watch_base):
|
||||
# Text data (HTML, JSON, etc.) - apply brotli compression
|
||||
if not skip_brotli and len(contents) > threshold:
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||
# Use subprocess for brotli compression to isolate memory
|
||||
encoded_data = None # Will be compressed in subprocess
|
||||
else:
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
encoded_data = contents.encode('utf-8')
|
||||
@@ -368,12 +446,29 @@ class model(watch_base):
|
||||
|
||||
# Write snapshot file atomically if it doesn't exist
|
||||
if not os.path.exists(dest):
|
||||
if encoded_data is None:
|
||||
# Brotli compression in subprocess
|
||||
try:
|
||||
actual_dest = _brotli_subprocess_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True)
|
||||
# Update snapshot_fname if fallback was used
|
||||
if actual_dest != dest:
|
||||
snapshot_fname = os.path.basename(actual_dest)
|
||||
except Exception as e:
|
||||
logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
|
||||
# Last resort fallback
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(contents.encode('utf-8'))
|
||||
else:
|
||||
# Binary or small text - write directly
|
||||
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
|
||||
tmp.write(encoded_data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = tmp.name
|
||||
os.rename(tmp_path, dest)
|
||||
del encoded_data
|
||||
|
||||
# Append to history.txt atomically
|
||||
index_fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
@@ -788,25 +883,13 @@ class model(watch_base):
|
||||
def save_last_text_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
|
||||
_brotli_subprocess_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
|
||||
def save_last_fetched_html(self, timestamp, contents):
|
||||
import brotli
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
contents = contents.encode('utf-8') if isinstance(contents, str) else contents
|
||||
try:
|
||||
f.write(brotli.compress(contents))
|
||||
except Exception as e:
|
||||
logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
|
||||
logger.warning(e)
|
||||
f.write(contents)
|
||||
|
||||
_brotli_subprocess_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
self._prune_last_fetched_html_snapshots()
|
||||
|
||||
def get_fetched_html(self, timestamp):
|
||||
|
||||
@@ -312,6 +312,8 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
|
||||
Returns:
|
||||
Rendered template or redirect
|
||||
"""
|
||||
import gc
|
||||
from flask import after_this_request
|
||||
# Get version parameters (from_version, to_version)
|
||||
versions = list(watch.history.keys())
|
||||
|
||||
@@ -486,9 +488,22 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load comparison history data: {e}")
|
||||
|
||||
# Register cleanup callback to release memory after response is sent
|
||||
@after_this_request
|
||||
def cleanup_memory(response):
|
||||
"""Force garbage collection after response sent to release large image data."""
|
||||
try:
|
||||
# Force garbage collection to immediately release memory
|
||||
# This helps ensure base64 image strings (which can be 5-10MB+) are freed
|
||||
collected = gc.collect()
|
||||
logger.debug(f"Memory cleanup: Forced GC after diff render (collected {collected} objects)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Memory cleanup GC failed: {e}")
|
||||
return response
|
||||
|
||||
# Render custom template
|
||||
# Template path is namespaced to avoid conflicts with other processors
|
||||
return render_template(
|
||||
response = render_template(
|
||||
'image_ssim_diff/diff.html',
|
||||
watch=watch,
|
||||
uuid=watch.get('uuid'),
|
||||
@@ -504,3 +519,9 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
|
||||
to_version=to_version,
|
||||
percentage_different=change_percentage
|
||||
)
|
||||
|
||||
# Explicitly delete large base64 strings now that they're in the response
|
||||
# This helps free memory before the function returns
|
||||
del img_from_b64, img_to_b64, diff_image_b64, comparison_data
|
||||
|
||||
return response
|
||||
|
||||
@@ -24,7 +24,7 @@ class perform_site_check(difference_detection_processor):
|
||||
"""Fast screenshot comparison processor."""
|
||||
|
||||
# Override to use PNG format for better image comparison (JPEG compression creates noise)
|
||||
screenshot_format = SCREENSHOT_FORMAT_PNG
|
||||
#screenshot_format = SCREENSHOT_FORMAT_PNG
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
"""
|
||||
@@ -175,11 +175,13 @@ class perform_site_check(difference_detection_processor):
|
||||
logger.info(f"First check for watch {watch.get('uuid')} - saving baseline screenshot")
|
||||
|
||||
# Close the PIL images before returning
|
||||
import gc
|
||||
current_img.close()
|
||||
del current_img
|
||||
if cropped_current_img:
|
||||
cropped_current_img.close()
|
||||
del cropped_current_img
|
||||
gc.collect()
|
||||
|
||||
update_obj = {
|
||||
'previous_md5': hashlib.md5(self.screenshot).hexdigest(),
|
||||
@@ -248,12 +250,14 @@ class perform_site_check(difference_detection_processor):
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load previous screenshot for comparison: {e}")
|
||||
# Clean up current images before returning
|
||||
import gc
|
||||
if 'current_img' in locals():
|
||||
current_img.close()
|
||||
del current_img
|
||||
if 'cropped_current_img' in locals() and cropped_current_img:
|
||||
cropped_current_img.close()
|
||||
del cropped_current_img
|
||||
gc.collect()
|
||||
|
||||
# If we can't load previous, treat as first check
|
||||
update_obj = {
|
||||
@@ -273,10 +277,12 @@ class perform_site_check(difference_detection_processor):
|
||||
# Ensure images are the same size
|
||||
if img_for_comparison_curr.size != img_for_comparison_prev.size:
|
||||
logger.info(f"Resizing images to match: {img_for_comparison_prev.size} -> {img_for_comparison_curr.size}")
|
||||
img_for_comparison_prev = img_for_comparison_prev.resize(img_for_comparison_curr.size, Image.Resampling.LANCZOS)
|
||||
# If we resized a cropped version, update the reference
|
||||
if cropped_previous_img:
|
||||
cropped_previous_img = img_for_comparison_prev
|
||||
resized_img = img_for_comparison_prev.resize(img_for_comparison_curr.size, Image.Resampling.LANCZOS)
|
||||
# If we resized a cropped version, close the old cropped image before replacing
|
||||
if cropped_previous_img and img_for_comparison_prev is cropped_previous_img:
|
||||
cropped_previous_img.close()
|
||||
cropped_previous_img = resized_img
|
||||
img_for_comparison_prev = resized_img
|
||||
|
||||
if comparison_method == 'pixelmatch':
|
||||
changed_detected, change_score = self._compare_pixelmatch(
|
||||
@@ -290,6 +296,7 @@ class perform_site_check(difference_detection_processor):
|
||||
logger.info(f"OpenCV: {change_score:.2f}% pixels changed, threshold: {threshold:.0f}")
|
||||
|
||||
# Explicitly close PIL images to free memory immediately
|
||||
import gc
|
||||
current_img.close()
|
||||
previous_img.close()
|
||||
del current_img
|
||||
@@ -302,6 +309,9 @@ class perform_site_check(difference_detection_processor):
|
||||
del cropped_previous_img
|
||||
del previous_screenshot_bytes # Release the large bytes object
|
||||
|
||||
# Force garbage collection to immediately release memory
|
||||
gc.collect()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to compare screenshots: {e}")
|
||||
# Ensure cleanup even on error
|
||||
@@ -349,6 +359,7 @@ class perform_site_check(difference_detection_processor):
|
||||
"""
|
||||
import cv2
|
||||
import numpy as np
|
||||
import gc
|
||||
|
||||
# Convert PIL images to numpy arrays
|
||||
arr_from = np.array(img_from)
|
||||
@@ -362,6 +373,14 @@ class perform_site_check(difference_detection_processor):
|
||||
gray_from = arr_from
|
||||
gray_to = arr_to
|
||||
|
||||
# Release original arrays if we converted them
|
||||
if gray_from is not arr_from:
|
||||
del arr_from
|
||||
arr_from = None
|
||||
if gray_to is not arr_to:
|
||||
del arr_to
|
||||
arr_to = None
|
||||
|
||||
# Optional: Apply Gaussian blur to reduce sensitivity to minor rendering differences
|
||||
# Controlled by environment variable, default sigma=0.8
|
||||
blur_sigma = float(os.getenv("OPENCV_BLUR_SIGMA", "0.8"))
|
||||
@@ -386,9 +405,13 @@ class perform_site_check(difference_detection_processor):
|
||||
changed_detected = change_percentage > min_change_percentage
|
||||
|
||||
# Explicit memory cleanup - mark large arrays for garbage collection
|
||||
del arr_from, arr_to
|
||||
if arr_from is not None:
|
||||
del arr_from
|
||||
if arr_to is not None:
|
||||
del arr_to
|
||||
del gray_from, gray_to
|
||||
del diff, thresh
|
||||
gc.collect()
|
||||
|
||||
return changed_detected, change_percentage
|
||||
|
||||
@@ -415,18 +438,26 @@ class perform_site_check(difference_detection_processor):
|
||||
return self._compare_opencv(img_from, img_to, threshold * 255)
|
||||
|
||||
import numpy as np
|
||||
import gc
|
||||
|
||||
# Track converted images so we can close them
|
||||
converted_img_from = None
|
||||
converted_img_to = None
|
||||
|
||||
# Convert to RGB if not already
|
||||
if img_from.mode != 'RGB':
|
||||
img_from = img_from.convert('RGB')
|
||||
converted_img_from = img_from.convert('RGB')
|
||||
img_from = converted_img_from
|
||||
if img_to.mode != 'RGB':
|
||||
img_to = img_to.convert('RGB')
|
||||
converted_img_to = img_to.convert('RGB')
|
||||
img_to = converted_img_to
|
||||
|
||||
# Convert to numpy arrays (pixelmatch expects RGBA format)
|
||||
arr_from = np.array(img_from)
|
||||
arr_to = np.array(img_to)
|
||||
|
||||
# Add alpha channel (pixelmatch expects RGBA)
|
||||
alpha = None
|
||||
if arr_from.shape[2] == 3:
|
||||
alpha = np.ones((arr_from.shape[0], arr_from.shape[1], 1), dtype=np.uint8) * 255
|
||||
arr_from = np.concatenate([arr_from, alpha], axis=2)
|
||||
@@ -458,11 +489,16 @@ class perform_site_check(difference_detection_processor):
|
||||
min_change_percentage = float(os.getenv("PIXELMATCH_MIN_CHANGE_PERCENT", "0.1"))
|
||||
changed_detected = change_percentage > min_change_percentage
|
||||
|
||||
# Explicit memory cleanup - mark large arrays for garbage collection
|
||||
# Explicit memory cleanup - close converted images and delete arrays
|
||||
if converted_img_from is not None:
|
||||
converted_img_from.close()
|
||||
if converted_img_to is not None:
|
||||
converted_img_to.close()
|
||||
del arr_from, arr_to
|
||||
del diff_array
|
||||
if 'alpha' in locals():
|
||||
if alpha is not None:
|
||||
del alpha
|
||||
gc.collect()
|
||||
|
||||
return changed_detected, change_percentage
|
||||
|
||||
@@ -518,6 +554,15 @@ class perform_site_check(difference_detection_processor):
|
||||
"""
|
||||
import cv2
|
||||
import numpy as np
|
||||
import gc
|
||||
|
||||
template_img = None
|
||||
current_array = None
|
||||
template_array = None
|
||||
current_gray = None
|
||||
template_gray = None
|
||||
search_region = None
|
||||
result = None
|
||||
|
||||
try:
|
||||
# Load template from watch data directory
|
||||
@@ -529,23 +574,9 @@ class perform_site_check(difference_detection_processor):
|
||||
from PIL import Image
|
||||
|
||||
template_img = Image.open(template_path)
|
||||
template_img.load() # Force load image data into memory
|
||||
|
||||
# Convert images to numpy arrays for OpenCV
|
||||
current_array = np.array(current_img)
|
||||
template_array = np.array(template_img)
|
||||
|
||||
# Convert to grayscale for matching
|
||||
if len(current_array.shape) == 3:
|
||||
current_gray = cv2.cvtColor(current_array, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
current_gray = current_array
|
||||
|
||||
if len(template_array.shape) == 3:
|
||||
template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY)
|
||||
else:
|
||||
template_gray = template_array
|
||||
|
||||
# Calculate search region
|
||||
# Calculate search region dimensions first
|
||||
left, top, right, bottom = original_bbox
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
@@ -559,15 +590,56 @@ class perform_site_check(difference_detection_processor):
|
||||
search_right = min(current_img.width, right + margin_x)
|
||||
search_bottom = min(current_img.height, bottom + margin_y)
|
||||
|
||||
# Extract search region
|
||||
search_region = current_gray[search_top:search_bottom, search_left:search_right]
|
||||
# Convert only the search region of current image to numpy array (not the whole image!)
|
||||
current_img_cropped = current_img.crop((search_left, search_top, search_right, search_bottom))
|
||||
current_array = np.array(current_img_cropped)
|
||||
current_img_cropped.close() # Close immediately after conversion
|
||||
del current_img_cropped
|
||||
|
||||
# Convert template to numpy array
|
||||
template_array = np.array(template_img)
|
||||
|
||||
# Close template image immediately after conversion
|
||||
template_img.close()
|
||||
template_img = None
|
||||
|
||||
# Convert to grayscale for matching
|
||||
if len(current_array.shape) == 3:
|
||||
current_gray = cv2.cvtColor(current_array, cv2.COLOR_RGB2GRAY)
|
||||
del current_array # Delete immediately
|
||||
current_array = None
|
||||
else:
|
||||
current_gray = current_array
|
||||
current_array = None # Just transfer reference
|
||||
|
||||
if len(template_array.shape) == 3:
|
||||
template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY)
|
||||
del template_array # Delete immediately
|
||||
template_array = None
|
||||
else:
|
||||
template_gray = template_array
|
||||
template_array = None # Just transfer reference
|
||||
|
||||
logger.debug(f"Searching for template in region: ({search_left}, {search_top}) to ({search_right}, {search_bottom})")
|
||||
|
||||
# Perform template matching
|
||||
result = cv2.matchTemplate(search_region, template_gray, cv2.TM_CCOEFF_NORMED)
|
||||
# Perform template matching (search_region is now just current_gray since we pre-cropped)
|
||||
result = cv2.matchTemplate(current_gray, template_gray, cv2.TM_CCOEFF_NORMED)
|
||||
|
||||
# Delete arrays immediately after matchTemplate
|
||||
del current_gray
|
||||
current_gray = None
|
||||
del template_gray
|
||||
template_gray = None
|
||||
|
||||
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
||||
|
||||
# Delete result array immediately after getting values
|
||||
del result
|
||||
result = None
|
||||
|
||||
# Force garbage collection now that large arrays are freed
|
||||
gc.collect()
|
||||
|
||||
logger.debug(f"Template matching confidence: {max_val:.2%}")
|
||||
|
||||
# Check if match is good enough (80% confidence threshold)
|
||||
@@ -586,15 +658,34 @@ class perform_site_check(difference_detection_processor):
|
||||
f"moved {move_x}px horizontally, {move_y}px vertically, "
|
||||
f"confidence: {max_val:.2%}")
|
||||
|
||||
# Close template image
|
||||
template_img.close()
|
||||
|
||||
return new_bbox
|
||||
else:
|
||||
logger.warning(f"Template match confidence too low: {max_val:.2%} (need 80%)")
|
||||
template_img.close()
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Template matching error: {e}")
|
||||
return None
|
||||
|
||||
finally:
|
||||
# Cleanup any remaining objects (in case of early return or exception)
|
||||
if template_img is not None:
|
||||
try:
|
||||
template_img.close()
|
||||
except:
|
||||
pass
|
||||
if result is not None:
|
||||
del result
|
||||
if search_region is not None:
|
||||
del search_region
|
||||
if template_gray is not None:
|
||||
del template_gray
|
||||
if template_array is not None:
|
||||
del template_array
|
||||
if current_gray is not None:
|
||||
del current_gray
|
||||
if current_array is not None:
|
||||
del current_array
|
||||
|
||||
# Force garbage collection to immediately release memory
|
||||
gc.collect()
|
||||
|
||||
Reference in New Issue
Block a user