add note

New datastore message should be warning not critical
Improved watch global settings handling
2026-02-06 14:26:07 +00:00 · 2026-02-05 16:30:35 +01:00 · 2026-02-05 16:24:52 +01:00 · 2026-02-05 16:21:52 +01:00
4 changed files with 121 additions and 26 deletions
--- a/changedetectionio/api/Watch.py
+++ b/changedetectionio/api/Watch.py
@@ -66,47 +66,42 @@ class Watch(Resource):
    @validate_openapi_request('getWatch')
    def get(self, uuid):
        """Get information about a single watch, recheck, pause, or mute."""
-        import time
-        from copy import deepcopy
-        watch = None
-        # Retry up to 20 times if dict is being modified
-        # With sleep(0), this is fast: ~200µs best case, ~20ms worst case under heavy load
-        for attempt in range(20):
-            try:
-                watch = deepcopy(self.datastore.data['watching'].get(uuid))
-                break
-            except RuntimeError:
-                # Dict changed during deepcopy, retry after yielding to scheduler
-                # sleep(0) releases GIL and yields - no fixed delay, just lets other threads run
-                if attempt < 19:  # Don't yield on last attempt
-                    time.sleep(0)  # Yield to scheduler (microseconds, not milliseconds)
-
-        if not watch:
+        # Get watch reference first (for pause/mute operations)
+        watch_obj = self.datastore.data['watching'].get(uuid)
+        if not watch_obj:
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))

+        # Create a dict copy for JSON response (with lock for thread safety)
+        # This is much faster than deepcopy and doesn't copy the datastore reference
+        # WARNING: dict() is a SHALLOW copy - nested dicts are shared with original!
+        # Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts
+        # If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj))
+        with self.datastore.lock:
+            watch = dict(watch_obj)
+
        if request.args.get('recheck'):
            worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            return "OK", 200
        if request.args.get('paused', '') == 'paused':
-            self.datastore.data['watching'].get(uuid).pause()
+            watch_obj.pause()
            return "OK", 200
        elif request.args.get('paused', '') == 'unpaused':
-            self.datastore.data['watching'].get(uuid).unpause()
+            watch_obj.unpause()
            return "OK", 200
        if request.args.get('muted', '') == 'muted':
-            self.datastore.data['watching'].get(uuid).mute()
+            watch_obj.mute()
            return "OK", 200
        elif request.args.get('muted', '') == 'unmuted':
-            self.datastore.data['watching'].get(uuid).unmute()
+            watch_obj.unmute()
            return "OK", 200

        # Return without history, get that via another API call
        # Properties are not returned as a JSON, so add the required props manually
-        watch['history_n'] = watch.history_n
+        watch['history_n'] = watch_obj.history_n
        # attr .last_changed will check for the last written text snapshot on change
-        watch['last_changed'] = watch.last_changed
-        watch['viewed'] = watch.viewed
-        watch['link'] = watch.link,
+        watch['last_changed'] = watch_obj.last_changed
+        watch['viewed'] = watch_obj.viewed
+        watch['link'] = watch_obj.link,

        return watch

--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -131,6 +131,95 @@ class model(watch_base):
        # Be sure the cached timestamp is ready
        bump = self.history

+    def __deepcopy__(self, memo):
+        """
+        Custom deepcopy that excludes __datastore to prevent memory leaks.
+
+        CRITICAL FIX: Without this, deepcopy(watch) copies the entire datastore
+        (which contains all other watches), causing exponential memory growth.
+        With 100 watches, this creates 10,000 watch objects in memory (100²).
+
+        This is called by:
+        - api/Watch.py:76 (API endpoint)
+        - processors/base.py:26 (EVERY processor run)
+        - store/__init__.py:544 (clone watch)
+        - And 4+ other locations
+        """
+        from copy import deepcopy
+
+        # Create a new instance without calling __init__ (avoids __datastore requirement)
+        cls = self.__class__
+        new_watch = cls.__new__(cls)
+        memo[id(self)] = new_watch
+
+        # Copy the dict data (all the watch settings)
+        for key, value in self.items():
+            new_watch[key] = deepcopy(value, memo)
+
+        # Copy instance attributes EXCEPT the datastore references
+        # These are cached/computed values that need to be preserved
+        new_watch._model__newest_history_key = self._model__newest_history_key
+        new_watch._model__history_n = self._model__history_n
+        new_watch.jitter_seconds = self.jitter_seconds
+
+        # Copy datastore_path (string, safe to copy)
+        new_watch._model__datastore_path = self._model__datastore_path
+
+        # CRITICAL: Share the datastore reference (don't copy it!)
+        # This is safe because we never modify the datastore through the watch
+        new_watch._model__datastore = self._model__datastore
+
+        # Do NOT copy favicon cache - let it be regenerated on demand
+        # This is just a performance cache (prevents repeated glob operations)
+        # and will be rebuilt automatically on first access
+
+        return new_watch
+
+    def __getstate__(self):
+        """
+        Custom pickle serialization that excludes __datastore.
+
+        This handles pickle/unpickle (used by multiprocessing, caching, etc.)
+        and ensures the datastore reference is never serialized.
+        """
+        # Get the dict data
+        state = dict(self)
+
+        # Add the instance attributes we want to preserve
+        state['__watch_metadata__'] = {
+            'newest_history_key': self._model__newest_history_key,
+            'history_n': self._model__history_n,
+            'jitter_seconds': self.jitter_seconds,
+            'datastore_path': self._model__datastore_path,
+        }
+
+        # NOTE: __datastore and _favicon_filename_cache are intentionally excluded
+        # Both will be regenerated/restored as needed
+        return state
+
+    def __setstate__(self, state):
+        """
+        Custom pickle deserialization.
+
+        WARNING: This creates a Watch without a __datastore reference!
+        The caller MUST set watch._model__datastore after unpickling.
+        """
+        # Extract metadata
+        metadata = state.pop('__watch_metadata__', {})
+
+        # Restore dict data
+        self.update(state)
+
+        # Restore instance attributes
+        self._model__newest_history_key = metadata.get('newest_history_key')
+        self._model__history_n = metadata.get('history_n', 0)
+        self.jitter_seconds = metadata.get('jitter_seconds', 0)
+        self._model__datastore_path = metadata.get('datastore_path')
+
+        # __datastore is NOT restored - caller must set it!
+        # _favicon_filename_cache is NOT restored - will regenerate on demand
+        self._model__datastore = None
+
    @property
    def viewed(self):
        # Don't return viewed when last_viewed is 0 and newest_key is 0
--- a/changedetectionio/processors/base.py
+++ b/changedetectionio/processors/base.py
@@ -23,7 +23,14 @@ class difference_detection_processor():
    def __init__(self, datastore, watch_uuid):
        self.datastore = datastore
        self.watch_uuid = watch_uuid
+
+        # Create a stable snapshot of the watch for processing
+        # Why deepcopy?
+        # 1. Prevents "dict changed during iteration" errors if watch is modified during processing
+        # 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
+        # 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
        self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
+
        # Generic fetcher that should be extended (requests, playwright etc)
        self.fetcher = Fetcher()

--- a/changedetectionio/store/init.py
+++ b/changedetectionio/store/init.py
@@ -248,7 +248,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):

            else:
                # Fresh install - create new datastore
-                logger.critical(f"No datastore found, creating new datastore at {self.datastore_path}")
+                logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")

                # Set schema version to latest (no updates needed)
                updates_available = self.get_updates_available()
@@ -541,7 +541,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
    # Clone a watch by UUID
    def clone(self, uuid):
        url = self.data['watching'][uuid].get('url')
-        extras = deepcopy(self.data['watching'][uuid])
+        # No need to deepcopy here - add_watch() will deepcopy extras anyway (line 569)
+        # Just pass a dict copy (with lock for thread safety)
+        # NOTE: dict() is shallow copy but safe since add_watch() deepcopies it
+        with self.lock:
+            extras = dict(self.data['watching'][uuid])
        new_uuid = self.add_watch(url=url, extras=extras)
        watch = self.data['watching'][new_uuid]
        return new_uuid
Author	SHA1	Message	Date
dgtlmoon	799818dd40	add note	2026-02-05 16:30:35 +01:00
dgtlmoon	b06797636c	New datastore message should be warning not critical	2026-02-05 16:24:52 +01:00
dgtlmoon	fcd07e23f3	Improved watch global settings handling	2026-02-05 16:21:52 +01:00