Compare commits

...

1 Commits

Author SHA1 Message Date
dgtlmoon
1d281baace Avoid reprocessing if the page was the same 2026-02-12 15:56:36 +01:00
6 changed files with 26 additions and 6 deletions

View File

@@ -190,7 +190,7 @@ def get_plugin_processor_metadata():
logger.warning(f"Error getting plugin processor metadata: {e}")
return metadata
@lru_cache(maxsize=1)
def available_processors():
"""
Get a list of processors by name and description for the UI elements.

View File

@@ -257,8 +257,16 @@ class difference_detection_processor():
except IOError as e:
logger.error(f"Failed to write extra watch config {filename}: {e}")
def get_raw_document_checksum(self):
checksum = None
if self.fetcher.content:
checksum = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
return checksum
@abstractmethod
def run_changedetection(self, watch):
def run_changedetection(self, watch, force_reprocess=False):
update_obj = {'last_notification_error': False, 'last_error': False}
some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()

View File

@@ -30,7 +30,7 @@ class perform_site_check(difference_detection_processor):
# Override to use PNG format for better image comparison (JPEG compression creates noise)
screenshot_format = SCREENSHOT_FORMAT_PNG
def run_changedetection(self, watch):
def run_changedetection(self, watch, force_reprocess=False):
"""
Perform screenshot comparison using OpenCV subprocess handler.

View File

@@ -2,6 +2,7 @@ from ..base import difference_detection_processor
from ..exceptions import ProcessorException
from . import Restock
from loguru import logger
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
import urllib3
import time
@@ -403,12 +404,16 @@ class perform_site_check(difference_detection_processor):
screenshot = None
xpath_data = None
def run_changedetection(self, watch):
def run_changedetection(self, watch, force_reprocess=False):
import hashlib
if not watch:
raise Exception("Watch no longer exists.")
current_raw_document_checksum = self.get_raw_document_checksum()
if not force_reprocess and watch.get('previous_md5_before_filters') and watch.get('previous_md5_before_filters') == current_raw_document_checksum:
raise checksumFromPreviousCheckWasTheSame()
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()}
@@ -418,6 +423,7 @@ class perform_site_check(difference_detection_processor):
# Track the content type
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
update_obj['previous_md5_before_filters'] = current_raw_document_checksum
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found

View File

@@ -7,6 +7,7 @@ import re
import urllib3
from changedetectionio.conditions import execute_ruleset_against_all_plugins
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
from ..base import difference_detection_processor
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
from changedetectionio import html_tools, content_fetchers
@@ -368,12 +369,16 @@ class ChecksumCalculator:
# (set_proxy_from_list)
class perform_site_check(difference_detection_processor):
def run_changedetection(self, watch):
def run_changedetection(self, watch, force_reprocess=False):
changed_detected = False
if not watch:
raise Exception("Watch no longer exists.")
current_raw_document_checksum = self.get_raw_document_checksum()
if not force_reprocess and watch.get('previous_md5_before_filters') and watch.get('previous_md5_before_filters') == current_raw_document_checksum:
raise checksumFromPreviousCheckWasTheSame()
# Initialize components
filter_config = FilterConfig(watch, self.datastore)
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
@@ -393,7 +398,7 @@ class perform_site_check(difference_detection_processor):
# Track the content type and checksum before filters
update_obj['content_type'] = ctype_header
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
update_obj['previous_md5_before_filters'] = current_raw_document_checksum
# === CONTENT PREPROCESSING ===
# Avoid creating unnecessary intermediate string copies by reassigning only when needed

View File

@@ -276,6 +276,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
# Yes fine, so nothing todo, don't continue to process.
process_changedetection_results = False
changed_detected = False
logger.debug(f'[{uuid}] - checksumFromPreviousCheckWasTheSame - Checksum from previous check was the same, nothing todo here.')
except content_fetchers_exceptions.BrowserConnectError as e:
datastore.update_watch(uuid=uuid,