Re #3833

New datastore message should be warning not critical
2026-02-14 10:16:01 +00:00 · 2026-02-05 16:37:33 +01:00 · 2026-02-05 16:25:22 +01:00
5 changed files with 56 additions and 130 deletions
--- a/changedetectionio/api/Watch.py
+++ b/changedetectionio/api/Watch.py
@@ -66,42 +66,47 @@ class Watch(Resource):
    @validate_openapi_request('getWatch')
    def get(self, uuid):
        """Get information about a single watch, recheck, pause, or mute."""
-        # Get watch reference first (for pause/mute operations)
-        watch_obj = self.datastore.data['watching'].get(uuid)
-        if not watch_obj:
-            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
+        import time
+        from copy import deepcopy
+        watch = None
+        # Retry up to 20 times if dict is being modified
+        # With sleep(0), this is fast: ~200µs best case, ~20ms worst case under heavy load
+        for attempt in range(20):
+            try:
+                watch = deepcopy(self.datastore.data['watching'].get(uuid))
+                break
+            except RuntimeError:
+                # Dict changed during deepcopy, retry after yielding to scheduler
+                # sleep(0) releases GIL and yields - no fixed delay, just lets other threads run
+                if attempt < 19:  # Don't yield on last attempt
+                    time.sleep(0)  # Yield to scheduler (microseconds, not milliseconds)

-        # Create a dict copy for JSON response (with lock for thread safety)
-        # This is much faster than deepcopy and doesn't copy the datastore reference
-        # WARNING: dict() is a SHALLOW copy - nested dicts are shared with original!
-        # Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts
-        # If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj))
-        with self.datastore.lock:
-            watch = dict(watch_obj)
+        if not watch:
+            abort(404, message='No watch exists with the UUID of {}'.format(uuid))

        if request.args.get('recheck'):
            worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            return "OK", 200
        if request.args.get('paused', '') == 'paused':
-            watch_obj.pause()
+            self.datastore.data['watching'].get(uuid).pause()
            return "OK", 200
        elif request.args.get('paused', '') == 'unpaused':
-            watch_obj.unpause()
+            self.datastore.data['watching'].get(uuid).unpause()
            return "OK", 200
        if request.args.get('muted', '') == 'muted':
-            watch_obj.mute()
+            self.datastore.data['watching'].get(uuid).mute()
            return "OK", 200
        elif request.args.get('muted', '') == 'unmuted':
-            watch_obj.unmute()
+            self.datastore.data['watching'].get(uuid).unmute()
            return "OK", 200

        # Return without history, get that via another API call
        # Properties are not returned as a JSON, so add the required props manually
-        watch['history_n'] = watch_obj.history_n
+        watch['history_n'] = watch.history_n
        # attr .last_changed will check for the last written text snapshot on change
-        watch['last_changed'] = watch_obj.last_changed
-        watch['viewed'] = watch_obj.viewed
-        watch['link'] = watch_obj.link,
+        watch['last_changed'] = watch.last_changed
+        watch['viewed'] = watch.viewed
+        watch['link'] = watch.link,

        return watch

--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -131,95 +131,6 @@ class model(watch_base):
        # Be sure the cached timestamp is ready
        bump = self.history

-    def __deepcopy__(self, memo):
-        """
-        Custom deepcopy that excludes __datastore to prevent memory leaks.
-
-        CRITICAL FIX: Without this, deepcopy(watch) copies the entire datastore
-        (which contains all other watches), causing exponential memory growth.
-        With 100 watches, this creates 10,000 watch objects in memory (100²).
-
-        This is called by:
-        - api/Watch.py:76 (API endpoint)
-        - processors/base.py:26 (EVERY processor run)
-        - store/__init__.py:544 (clone watch)
-        - And 4+ other locations
-        """
-        from copy import deepcopy
-
-        # Create a new instance without calling __init__ (avoids __datastore requirement)
-        cls = self.__class__
-        new_watch = cls.__new__(cls)
-        memo[id(self)] = new_watch
-
-        # Copy the dict data (all the watch settings)
-        for key, value in self.items():
-            new_watch[key] = deepcopy(value, memo)
-
-        # Copy instance attributes EXCEPT the datastore references
-        # These are cached/computed values that need to be preserved
-        new_watch._model__newest_history_key = self._model__newest_history_key
-        new_watch._model__history_n = self._model__history_n
-        new_watch.jitter_seconds = self.jitter_seconds
-
-        # Copy datastore_path (string, safe to copy)
-        new_watch._model__datastore_path = self._model__datastore_path
-
-        # CRITICAL: Share the datastore reference (don't copy it!)
-        # This is safe because we never modify the datastore through the watch
-        new_watch._model__datastore = self._model__datastore
-
-        # Do NOT copy favicon cache - let it be regenerated on demand
-        # This is just a performance cache (prevents repeated glob operations)
-        # and will be rebuilt automatically on first access
-
-        return new_watch
-
-    def __getstate__(self):
-        """
-        Custom pickle serialization that excludes __datastore.
-
-        This handles pickle/unpickle (used by multiprocessing, caching, etc.)
-        and ensures the datastore reference is never serialized.
-        """
-        # Get the dict data
-        state = dict(self)
-
-        # Add the instance attributes we want to preserve
-        state['__watch_metadata__'] = {
-            'newest_history_key': self._model__newest_history_key,
-            'history_n': self._model__history_n,
-            'jitter_seconds': self.jitter_seconds,
-            'datastore_path': self._model__datastore_path,
-        }
-
-        # NOTE: __datastore and _favicon_filename_cache are intentionally excluded
-        # Both will be regenerated/restored as needed
-        return state
-
-    def __setstate__(self, state):
-        """
-        Custom pickle deserialization.
-
-        WARNING: This creates a Watch without a __datastore reference!
-        The caller MUST set watch._model__datastore after unpickling.
-        """
-        # Extract metadata
-        metadata = state.pop('__watch_metadata__', {})
-
-        # Restore dict data
-        self.update(state)
-
-        # Restore instance attributes
-        self._model__newest_history_key = metadata.get('newest_history_key')
-        self._model__history_n = metadata.get('history_n', 0)
-        self.jitter_seconds = metadata.get('jitter_seconds', 0)
-        self._model__datastore_path = metadata.get('datastore_path')
-
-        # __datastore is NOT restored - caller must set it!
-        # _favicon_filename_cache is NOT restored - will regenerate on demand
-        self._model__datastore = None
-
    @property
    def viewed(self):
        # Don't return viewed when last_viewed is 0 and newest_key is 0
--- a/changedetectionio/processors/base.py
+++ b/changedetectionio/processors/base.py
@@ -23,14 +23,7 @@ class difference_detection_processor():
    def __init__(self, datastore, watch_uuid):
        self.datastore = datastore
        self.watch_uuid = watch_uuid
-
-        # Create a stable snapshot of the watch for processing
-        # Why deepcopy?
-        # 1. Prevents "dict changed during iteration" errors if watch is modified during processing
-        # 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
-        # 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
        self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
-
        # Generic fetcher that should be extended (requests, playwright etc)
        self.fetcher = Fetcher()

--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -193,18 +193,17 @@ class perform_site_check(difference_detection_processor):


        itemprop_availability = {}
+        multiple_prices_found = False

        # Try built-in extraction first, this will scan metadata in the HTML
        try:
            itemprop_availability = get_itemprop_availability(self.fetcher.content)
        except MoreThanOnePriceFound as e:
-            # Add the real data
-            raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
-                                     url=watch.get('url'),
-                                     status_code=self.fetcher.get_last_status_code(),
-                                     screenshot=self.fetcher.screenshot,
-                                     xpath_data=self.fetcher.xpath_data
-                                     )
+            # Don't raise immediately - let plugins try to handle this case
+            # Plugins might be able to determine which price is correct
+            logger.warning(f"Built-in detection found multiple prices on {watch.get('url')}, will try plugin override")
+            multiple_prices_found = True
+            itemprop_availability = {}

        # If built-in extraction didn't get both price AND availability, try plugin override
        # Only check plugin if this watch is using a fetcher that might provide better data
@@ -216,9 +215,21 @@ class perform_site_check(difference_detection_processor):
            from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin
            fetcher_name = watch.get('fetch_backend', 'html_requests')

-            # Only try plugin override if not using system default (which might be anything)
-            if fetcher_name and fetcher_name != 'system':
-                logger.debug("Calling extra plugins for getting item price/availability")
+            # Resolve 'system' to the actual fetcher being used
+            # This allows plugins to work even when watch uses "system settings default"
+            if fetcher_name == 'system':
+                # Get the actual fetcher that was used (from self.fetcher)
+                # Fetcher class name gives us the actual backend (e.g., 'html_requests', 'html_webdriver')
+                actual_fetcher = type(self.fetcher).__name__
+                if 'html_requests' in actual_fetcher.lower():
+                    fetcher_name = 'html_requests'
+                elif 'webdriver' in actual_fetcher.lower() or 'playwright' in actual_fetcher.lower():
+                    fetcher_name = 'html_webdriver'
+                logger.debug(f"Resolved 'system' fetcher to actual fetcher: {fetcher_name}")
+
+            # Try plugin override - plugins can decide if they support this fetcher
+            if fetcher_name:
+                logger.debug(f"Calling extra plugins for getting item price/availability (fetcher: {fetcher_name})")
                plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link)

                if plugin_availability:
@@ -233,6 +244,16 @@ class perform_site_check(difference_detection_processor):
                if not plugin_availability:
                    logger.debug("No item price/availability from plugins")

+        # If we had multiple prices and plugins also failed, NOW raise the exception
+        if multiple_prices_found and not itemprop_availability.get('price'):
+            raise ProcessorException(
+                message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
+                url=watch.get('url'),
+                status_code=self.fetcher.get_last_status_code(),
+                screenshot=self.fetcher.screenshot,
+                xpath_data=self.fetcher.xpath_data
+            )
+
        # Something valid in get_itemprop_availability() by scraping metadata ?
        if itemprop_availability.get('price') or itemprop_availability.get('availability'):
            # Store for other usage
--- a/changedetectionio/store/init.py
+++ b/changedetectionio/store/init.py
@@ -541,11 +541,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
    # Clone a watch by UUID
    def clone(self, uuid):
        url = self.data['watching'][uuid].get('url')
-        # No need to deepcopy here - add_watch() will deepcopy extras anyway (line 569)
-        # Just pass a dict copy (with lock for thread safety)
-        # NOTE: dict() is shallow copy but safe since add_watch() deepcopies it
-        with self.lock:
-            extras = dict(self.data['watching'][uuid])
+        extras = deepcopy(self.data['watching'][uuid])
        new_uuid = self.add_watch(url=url, extras=extras)
        watch = self.data['watching'][new_uuid]
        return new_uuid
Author	SHA1	Message	Date
dgtlmoon	df8f61be98	Re #3833	2026-02-05 16:37:33 +01:00
dgtlmoon	bdc2916c07	New datastore message should be warning not critical	2026-02-05 16:25:22 +01:00