Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon
9729f4c4e4 tweak 2026-02-11 17:08:58 +01:00
dgtlmoon
759d4118bf Use extruct as a last resort 2026-02-11 16:59:44 +01:00
dgtlmoon
bafbdfb5c0 Price tracker - Use subprocess on linux for cleaner memory management. 2026-02-11 16:40:12 +01:00
6 changed files with 6 additions and 26 deletions

View File

@@ -190,7 +190,7 @@ def get_plugin_processor_metadata():
logger.warning(f"Error getting plugin processor metadata: {e}")
return metadata
@lru_cache(maxsize=1)
def available_processors():
"""
Get a list of processors by name and description for the UI elements.

View File

@@ -257,16 +257,8 @@ class difference_detection_processor():
except IOError as e:
logger.error(f"Failed to write extra watch config {filename}: {e}")
def get_raw_document_checksum(self):
checksum = None
if self.fetcher.content:
checksum = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
return checksum
@abstractmethod
def run_changedetection(self, watch, force_reprocess=False):
def run_changedetection(self, watch):
update_obj = {'last_notification_error': False, 'last_error': False}
some_data = 'xxxxx'
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()

View File

@@ -30,7 +30,7 @@ class perform_site_check(difference_detection_processor):
# Override to use PNG format for better image comparison (JPEG compression creates noise)
screenshot_format = SCREENSHOT_FORMAT_PNG
def run_changedetection(self, watch, force_reprocess=False):
def run_changedetection(self, watch):
"""
Perform screenshot comparison using OpenCV subprocess handler.

View File

@@ -2,7 +2,6 @@ from ..base import difference_detection_processor
from ..exceptions import ProcessorException
from . import Restock
from loguru import logger
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
import urllib3
import time
@@ -404,16 +403,12 @@ class perform_site_check(difference_detection_processor):
screenshot = None
xpath_data = None
def run_changedetection(self, watch, force_reprocess=False):
def run_changedetection(self, watch):
import hashlib
if not watch:
raise Exception("Watch no longer exists.")
current_raw_document_checksum = self.get_raw_document_checksum()
if not force_reprocess and watch.get('previous_md5_before_filters') and watch.get('previous_md5_before_filters') == current_raw_document_checksum:
raise checksumFromPreviousCheckWasTheSame()
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()}
@@ -423,7 +418,6 @@ class perform_site_check(difference_detection_processor):
# Track the content type
update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
update_obj['previous_md5_before_filters'] = current_raw_document_checksum
# Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
# Otherwise it will assume "in stock" because nothing suggesting the opposite was found

View File

@@ -7,7 +7,6 @@ import re
import urllib3
from changedetectionio.conditions import execute_ruleset_against_all_plugins
from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame
from ..base import difference_detection_processor
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
from changedetectionio import html_tools, content_fetchers
@@ -369,16 +368,12 @@ class ChecksumCalculator:
# (set_proxy_from_list)
class perform_site_check(difference_detection_processor):
def run_changedetection(self, watch, force_reprocess=False):
def run_changedetection(self, watch):
changed_detected = False
if not watch:
raise Exception("Watch no longer exists.")
current_raw_document_checksum = self.get_raw_document_checksum()
if not force_reprocess and watch.get('previous_md5_before_filters') and watch.get('previous_md5_before_filters') == current_raw_document_checksum:
raise checksumFromPreviousCheckWasTheSame()
# Initialize components
filter_config = FilterConfig(watch, self.datastore)
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
@@ -398,7 +393,7 @@ class perform_site_check(difference_detection_processor):
# Track the content type and checksum before filters
update_obj['content_type'] = ctype_header
update_obj['previous_md5_before_filters'] = current_raw_document_checksum
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
# === CONTENT PREPROCESSING ===
# Avoid creating unnecessary intermediate string copies by reassigning only when needed

View File

@@ -276,7 +276,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
# Yes fine, so nothing todo, don't continue to process.
process_changedetection_results = False
changed_detected = False
logger.debug(f'[{uuid}] - checksumFromPreviousCheckWasTheSame - Checksum from previous check was the same, nothing todo here.')
except content_fetchers_exceptions.BrowserConnectError as e:
datastore.update_watch(uuid=uuid,