From 427860ffaec31ab7244cd45501992be71498cd6d Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 12 Jul 2024 15:50:55 +0200 Subject: [PATCH] Fixing exceptions and multiple price finding --- .../processors/restock_diff/processor.py | 13 ++++++++++--- changedetectionio/update_worker.py | 10 ---------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index 1b5b5e7e..4defd433 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -59,7 +59,9 @@ def get_itemprop_availability(html_content) -> Restock: price_result = price_parse.find(data) if price_result: - if len(price_result) > 1: + if len(price_result) > 1 and len(set(str(item.value).replace('$', '') for item in price_result)) > 1: + # See of all prices are different, in the case that one product has many embedded data types with the same price + # One might have $121.95 and another 121.95 etc raise MoreThanOnePriceFound() value['price'] = price_result[0].value @@ -88,7 +90,6 @@ def get_itemprop_availability(html_content) -> Restock: value['availability'] = _search_prop_by_value([match.value], "product:availability") if not value.get('currency'): value['currency'] = _search_prop_by_value([match.value], "price:currency") - logger.trace(f"Processed with Extruct in {time.time()-now:.3f}s") return value @@ -161,7 +162,13 @@ class perform_site_check(difference_detection_processor): fetched_md5 = None if not self.fetcher.instock_data and not itemprop_availability.get('availability'): - raise UnableToExtractRestockData(status_code=self.fetcher.status_code) + raise ProcessorException( + message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.", + url=watch.get('url'), + status_code=self.fetcher.get_last_status_code(), + screenshot=self.fetcher.screenshot, + xpath_data=self.fetcher.xpath_data + ) # Nothing automatic in microdata found, revert to scraping the page if self.fetcher.instock_data and itemprop_availability.get('availability') is None: diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 8777ebb0..a5af5c2b 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -1,7 +1,6 @@ from .processors.exceptions import ProcessorException from . import content_fetchers -from .processors.restock_diff.processor import UnableToExtractRestockData from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse from changedetectionio import html_tools @@ -472,15 +471,6 @@ class update_worker(threading.Thread): process_changedetection_results = False logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}") - except UnableToExtractRestockData as e: - # Usually when fetcher.instock_data returns empty - self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) - self.datastore.update_watch(uuid=uuid, - update_obj={ - 'last_error': f"Unable to extract restock data for this page unfortunately. (Got code {e.status_code} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.", - } - ) - process_changedetection_results = False except Exception as e: logger.error(f"Exception reached processing watch UUID: {uuid}") logger.error(str(e))