Avoid reprocessing if the page was the same

2026-02-13 17:56:02 +00:00 · 2026-02-12 15:56:36 +01:00
6 changed files with 26 additions and 6 deletions
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@@ -190,7 +190,7 @@ def get_plugin_processor_metadata():
        logger.warning(f"Error getting plugin processor metadata: {e}")
    return metadata

-
+@lru_cache(maxsize=1)
 def available_processors():
    """
    Get a list of processors by name and description for the UI elements.
--- a/changedetectionio/processors/base.py
+++ b/changedetectionio/processors/base.py
@@ -257,8 +257,16 @@ class difference_detection_processor():
        except IOError as e:
            logger.error(f"Failed to write extra watch config {filename}: {e}")

+    def get_raw_document_checksum(self):
+        checksum = None
+
+        if self.fetcher.content:
+            checksum = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
+
+        return checksum
+
    @abstractmethod
-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, force_reprocess=False):
        update_obj = {'last_notification_error': False, 'last_error': False}
        some_data = 'xxxxx'
        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
--- a/changedetectionio/processors/image_ssim_diff/processor.py
+++ b/changedetectionio/processors/image_ssim_diff/processor.py
@@ -30,7 +30,7 @@ class perform_site_check(difference_detection_processor):
    # Override to use PNG format for better image comparison (JPEG compression creates noise)
    screenshot_format = SCREENSHOT_FORMAT_PNG

-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, force_reprocess=False):
        """
        Perform screenshot comparison using OpenCV subprocess handler.

--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -2,6 +2,7 @@ from ..base import difference_detection_processor
 from ..exceptions import ProcessorException
 from . import Restock
 from loguru import logger
+from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame

 import urllib3
 import time
@@ -403,12 +404,16 @@ class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None

-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, force_reprocess=False):
        import hashlib

        if not watch:
            raise Exception("Watch no longer exists.")

+        current_raw_document_checksum = self.get_raw_document_checksum()
+        if not force_reprocess and watch.get('previous_md5_before_filters') and watch.get('previous_md5_before_filters') == current_raw_document_checksum:
+            raise checksumFromPreviousCheckWasTheSame()
+
        # Unset any existing notification error
        update_obj = {'last_notification_error': False, 'last_error': False, 'restock':  Restock()}

@@ -418,6 +423,7 @@ class perform_site_check(difference_detection_processor):
        # Track the content type
        update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
        update_obj["last_check_status"] = self.fetcher.get_last_status_code()
+        update_obj['previous_md5_before_filters'] = current_raw_document_checksum

        # Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
        # Otherwise it will assume "in stock" because nothing suggesting the opposite was found
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -7,6 +7,7 @@ import re
 import urllib3

 from changedetectionio.conditions import execute_ruleset_against_all_plugins
+from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
 from ..base import difference_detection_processor
 from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
 from changedetectionio import html_tools, content_fetchers
@@ -368,12 +369,16 @@ class ChecksumCalculator:
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):

-    def run_changedetection(self, watch):
+    def run_changedetection(self, watch, force_reprocess=False):
        changed_detected = False

        if not watch:
            raise Exception("Watch no longer exists.")

+        current_raw_document_checksum = self.get_raw_document_checksum()
+        if not force_reprocess and watch.get('previous_md5_before_filters') and watch.get('previous_md5_before_filters') == current_raw_document_checksum:
+            raise checksumFromPreviousCheckWasTheSame()
+
        # Initialize components
        filter_config = FilterConfig(watch, self.datastore)
        content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
@@ -393,7 +398,7 @@ class perform_site_check(difference_detection_processor):

        # Track the content type and checksum before filters
        update_obj['content_type'] = ctype_header
-        update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
+        update_obj['previous_md5_before_filters'] = current_raw_document_checksum

        # === CONTENT PREPROCESSING ===
        # Avoid creating unnecessary intermediate string copies by reassigning only when needed
--- a/changedetectionio/worker.py
+++ b/changedetectionio/worker.py
@@ -276,6 +276,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
                    # Yes fine, so nothing todo, don't continue to process.
                    process_changedetection_results = False
                    changed_detected = False
+                    logger.debug(f'[{uuid}] - checksumFromPreviousCheckWasTheSame - Checksum from previous check was the same, nothing todo here.')
                    
                except content_fetchers_exceptions.BrowserConnectError as e:
                    datastore.update_watch(uuid=uuid,