Adding more tests and references

Improved watch global settings handling (#3839 )
New datastore message should be warning not critical
2026-02-07 14:56:02 +00:00 · 2026-02-05 16:47:31 +01:00 · 2026-02-05 16:40:00 +01:00 · 2026-02-05 16:25:22 +01:00
7 changed files with 327 additions and 32 deletions
--- a/changedetectionio/api/Watch.py
+++ b/changedetectionio/api/Watch.py
@@ -66,47 +66,42 @@ class Watch(Resource):
    @validate_openapi_request('getWatch')
    def get(self, uuid):
        """Get information about a single watch, recheck, pause, or mute."""
-        import time
-        from copy import deepcopy
-        watch = None
-        # Retry up to 20 times if dict is being modified
-        # With sleep(0), this is fast: ~200µs best case, ~20ms worst case under heavy load
-        for attempt in range(20):
-            try:
-                watch = deepcopy(self.datastore.data['watching'].get(uuid))
-                break
-            except RuntimeError:
-                # Dict changed during deepcopy, retry after yielding to scheduler
-                # sleep(0) releases GIL and yields - no fixed delay, just lets other threads run
-                if attempt < 19:  # Don't yield on last attempt
-                    time.sleep(0)  # Yield to scheduler (microseconds, not milliseconds)
-
-        if not watch:
+        # Get watch reference first (for pause/mute operations)
+        watch_obj = self.datastore.data['watching'].get(uuid)
+        if not watch_obj:
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))

+        # Create a dict copy for JSON response (with lock for thread safety)
+        # This is much faster than deepcopy and doesn't copy the datastore reference
+        # WARNING: dict() is a SHALLOW copy - nested dicts are shared with original!
+        # Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts
+        # If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj))
+        with self.datastore.lock:
+            watch = dict(watch_obj)
+
        if request.args.get('recheck'):
            worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
            return "OK", 200
        if request.args.get('paused', '') == 'paused':
-            self.datastore.data['watching'].get(uuid).pause()
+            watch_obj.pause()
            return "OK", 200
        elif request.args.get('paused', '') == 'unpaused':
-            self.datastore.data['watching'].get(uuid).unpause()
+            watch_obj.unpause()
            return "OK", 200
        if request.args.get('muted', '') == 'muted':
-            self.datastore.data['watching'].get(uuid).mute()
+            watch_obj.mute()
            return "OK", 200
        elif request.args.get('muted', '') == 'unmuted':
-            self.datastore.data['watching'].get(uuid).unmute()
+            watch_obj.unmute()
            return "OK", 200

        # Return without history, get that via another API call
        # Properties are not returned as a JSON, so add the required props manually
-        watch['history_n'] = watch.history_n
+        watch['history_n'] = watch_obj.history_n
        # attr .last_changed will check for the last written text snapshot on change
-        watch['last_changed'] = watch.last_changed
-        watch['viewed'] = watch.viewed
-        watch['link'] = watch.link,
+        watch['last_changed'] = watch_obj.last_changed
+        watch['viewed'] = watch_obj.viewed
+        watch['link'] = watch_obj.link,

        return watch

--- a/changedetectionio/model/Tag.py
+++ b/changedetectionio/model/Tag.py
@@ -5,6 +5,11 @@ from changedetectionio.model import watch_base
 class model(watch_base):

    def __init__(self, *arg, **kw):
+        # Store datastore reference (optional for Tags, but good for consistency)
+        self.__datastore = kw.get('__datastore')
+        if kw.get('__datastore'):
+            del kw['__datastore']
+
        super(model, self).__init__(*arg, **kw)

        self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -131,6 +131,9 @@ class model(watch_base):
        # Be sure the cached timestamp is ready
        bump = self.history

+    # Note: __deepcopy__, __getstate__, and __setstate__ are inherited from watch_base
+    # This prevents memory leaks by sharing __datastore reference instead of copying it
+
    @property
    def viewed(self):
        # Don't return viewed when last_viewed is 0 and newest_key is 0
--- a/changedetectionio/model/init.py
+++ b/changedetectionio/model/init.py
@@ -140,4 +140,100 @@ class watch_base(dict):
        super(watch_base, self).__init__(*arg, **kw)

        if self.get('default'):
-            del self['default']
+            del self['default']
+
+    def __deepcopy__(self, memo):
+        """
+        Custom deepcopy for all watch_base subclasses (Watch, Tag, etc.).
+
+        CRITICAL FIX: Prevents copying large reference objects like __datastore
+        which would cause exponential memory growth when Watch objects are deepcopied.
+
+        This is called by:
+        - api/Watch.py:76 (API endpoint)
+        - api/Tags.py:28 (Tags API)
+        - processors/base.py:26 (EVERY processor run)
+        - store/__init__.py:544 (clone watch)
+        - And other locations
+        """
+        from copy import deepcopy
+
+        # Create new instance without calling __init__
+        cls = self.__class__
+        new_obj = cls.__new__(cls)
+        memo[id(self)] = new_obj
+
+        # Copy the dict data (all the settings)
+        for key, value in self.items():
+            new_obj[key] = deepcopy(value, memo)
+
+        # Copy instance attributes dynamically
+        # This handles Watch-specific attrs (like __datastore) and any future subclass attrs
+        for attr_name in dir(self):
+            # Skip methods, special attrs, and dict keys
+            if attr_name.startswith('_') and not attr_name.startswith('__'):
+                # This catches _model__datastore, _model__history_n, etc.
+                try:
+                    attr_value = getattr(self, attr_name)
+
+                    # Special handling: Share references to large objects instead of copying
+                    # Examples: __datastore, __app_reference, __global_settings, etc.
+                    if attr_name.endswith('__datastore') or attr_name.endswith('__app'):
+                        # Share the reference (don't copy!) to prevent memory leaks
+                        setattr(new_obj, attr_name, attr_value)
+                    # Skip cache attributes - let them regenerate on demand
+                    elif 'cache' in attr_name.lower():
+                        pass  # Don't copy caches
+                    # Copy regular instance attributes
+                    elif not callable(attr_value):
+                        setattr(new_obj, attr_name, attr_value)
+                except AttributeError:
+                    pass  # Attribute doesn't exist in this instance
+
+        return new_obj
+
+    def __getstate__(self):
+        """
+        Custom pickle serialization for all watch_base subclasses.
+
+        Excludes large reference objects (like __datastore) from serialization.
+        """
+        # Get the dict data
+        state = dict(self)
+
+        # Collect instance attributes (excluding methods and large references)
+        instance_attrs = {}
+        for attr_name in dir(self):
+            if attr_name.startswith('_') and not attr_name.startswith('__'):
+                try:
+                    attr_value = getattr(self, attr_name)
+                    # Exclude large reference objects and caches from serialization
+                    if not (attr_name.endswith('__datastore') or
+                           attr_name.endswith('__app') or
+                           'cache' in attr_name.lower() or
+                           callable(attr_value)):
+                        instance_attrs[attr_name] = attr_value
+                except AttributeError:
+                    pass
+
+        if instance_attrs:
+            state['__instance_metadata__'] = instance_attrs
+
+        return state
+
+    def __setstate__(self, state):
+        """
+        Custom pickle deserialization for all watch_base subclasses.
+
+        WARNING: Large reference objects (like __datastore) are NOT restored!
+        Caller must restore these references after unpickling if needed.
+        """
+        # Extract metadata
+        metadata = state.pop('__instance_metadata__', {})
+
+        # Restore dict data
+        self.update(state)
+
+        # Restore instance attributes
+        for attr_name, attr_value in metadata.items():
+            setattr(self, attr_name, attr_value)
--- a/changedetectionio/processors/base.py
+++ b/changedetectionio/processors/base.py
@@ -23,7 +23,14 @@ class difference_detection_processor():
    def __init__(self, datastore, watch_uuid):
        self.datastore = datastore
        self.watch_uuid = watch_uuid
+
+        # Create a stable snapshot of the watch for processing
+        # Why deepcopy?
+        # 1. Prevents "dict changed during iteration" errors if watch is modified during processing
+        # 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
+        # 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
        self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
+
        # Generic fetcher that should be extended (requests, playwright etc)
        self.fetcher = Fetcher()

--- a/changedetectionio/store/init.py
+++ b/changedetectionio/store/init.py
@@ -248,7 +248,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):

            else:
                # Fresh install - create new datastore
-                logger.critical(f"No datastore found, creating new datastore at {self.datastore_path}")
+                logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")

                # Set schema version to latest (no updates needed)
                updates_available = self.get_updates_available()
@@ -541,7 +541,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
    # Clone a watch by UUID
    def clone(self, uuid):
        url = self.data['watching'][uuid].get('url')
-        extras = deepcopy(self.data['watching'][uuid])
+        # No need to deepcopy here - add_watch() will deepcopy extras anyway (line 569)
+        # Just pass a dict copy (with lock for thread safety)
+        # NOTE: dict() is shallow copy but safe since add_watch() deepcopies it
+        with self.lock:
+            extras = dict(self.data['watching'][uuid])
        new_uuid = self.add_watch(url=url, extras=extras)
        watch = self.data['watching'][new_uuid]
        return new_uuid
@@ -872,10 +876,14 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        # So we use the same model as a Watch
        with self.lock:
            from ..model import Tag
-            new_tag = Tag.model(datastore_path=self.datastore_path, default={
-                'title': title.strip(),
-                'date_created': int(time.time())
-            })
+            new_tag = Tag.model(
+                datastore_path=self.datastore_path,
+                __datastore=self.__data,
+                default={
+                    'title': title.strip(),
+                    'date_created': int(time.time())
+                }
+            )

            new_uuid = new_tag.get('uuid')

--- a/changedetectionio/tests/unit/test_watch_model.py
+++ b/changedetectionio/tests/unit/test_watch_model.py
@@ -5,8 +5,10 @@

 import unittest
 import os
+import pickle
+from copy import deepcopy

-from changedetectionio.model import Watch
+from changedetectionio.model import Watch, Tag

 # mostly
 class TestDiffBuilder(unittest.TestCase):
@@ -68,5 +70,184 @@ class TestDiffBuilder(unittest.TestCase):
        p = watch.get_from_version_based_on_last_viewed
        assert p == "100", "Correct with only one history snapshot"

+    def test_watch_deepcopy_doesnt_copy_datastore(self):
+        """
+        CRITICAL: Ensure deepcopy(watch) shares __datastore instead of copying it.
+
+        Without this, deepcopy causes exponential memory growth:
+        - 100 watches × deepcopy each = 10,000 watch objects in memory (100²)
+        - Memory grows from 120MB → 2GB
+
+        This test prevents regressions in the __deepcopy__ implementation.
+        """
+        # Create mock datastore with multiple watches
+        mock_datastore = {
+            'settings': {'application': {'history_snapshot_max_length': 10}},
+            'watching': {}
+        }
+
+        # Create 3 watches that all reference the same datastore
+        watches = []
+        for i in range(3):
+            watch = Watch.model(
+                __datastore=mock_datastore,
+                datastore_path='/tmp/test',
+                default={'url': f'https://example{i}.com', 'title': f'Watch {i}'}
+            )
+            mock_datastore['watching'][watch['uuid']] = watch
+            watches.append(watch)
+
+        # Test 1: Deepcopy shares datastore reference (doesn't copy it)
+        watch_copy = deepcopy(watches[0])
+
+        self.assertIsNotNone(watch_copy._model__datastore,
+                            "__datastore should exist in copied watch")
+        self.assertIs(watch_copy._model__datastore, watches[0]._model__datastore,
+                     "__datastore should be SHARED (same object), not copied")
+        self.assertIs(watch_copy._model__datastore, mock_datastore,
+                     "__datastore should reference the original datastore")
+
+        # Test 2: Dict data is properly copied (not shared)
+        self.assertEqual(watch_copy['title'], 'Watch 0', "Dict data should be copied")
+        watch_copy['title'] = 'MODIFIED'
+        self.assertNotEqual(watches[0]['title'], 'MODIFIED',
+                           "Modifying copy should not affect original")
+
+        # Test 3: Verify no nested datastore copies in watch dict
+        # The dict should only contain watch settings, not the datastore
+        watch_dict = dict(watch_copy)
+        self.assertNotIn('__datastore', watch_dict,
+                        "__datastore should not be in dict keys")
+        self.assertNotIn('_model__datastore', watch_dict,
+                        "_model__datastore should not be in dict keys")
+
+        # Test 4: Multiple deepcopies don't cause exponential memory growth
+        # If datastore was copied, each copy would contain 3 watches,
+        # and those watches would contain the datastore, etc. (infinite recursion)
+        copies = []
+        for _ in range(5):
+            copies.append(deepcopy(watches[0]))
+
+        # All copies should share the same datastore
+        for copy in copies:
+            self.assertIs(copy._model__datastore, mock_datastore,
+                         "All copies should share the original datastore")
+
+    def test_watch_pickle_doesnt_serialize_datastore(self):
+        """
+        Ensure pickle/unpickle doesn't serialize __datastore.
+
+        This is important for multiprocessing and caching - we don't want
+        to serialize the entire datastore when pickling a watch.
+        """
+        mock_datastore = {
+            'settings': {'application': {}},
+            'watching': {}
+        }
+
+        watch = Watch.model(
+            __datastore=mock_datastore,
+            datastore_path='/tmp/test',
+            default={'url': 'https://example.com', 'title': 'Test Watch'}
+        )
+
+        # Pickle and unpickle
+        pickled = pickle.dumps(watch)
+        unpickled_watch = pickle.loads(pickled)
+
+        # Test 1: Watch data is preserved
+        self.assertEqual(unpickled_watch['url'], 'https://example.com',
+                        "Dict data should be preserved after pickle/unpickle")
+
+        # Test 2: __datastore is NOT serialized (attribute shouldn't exist after unpickle)
+        self.assertFalse(hasattr(unpickled_watch, '_model__datastore'),
+                         "__datastore attribute should not exist after unpickle (not serialized)")
+
+        # Test 3: Pickled data shouldn't contain the large datastore object
+        # If datastore was serialized, the pickle size would be much larger
+        pickle_size = len(pickled)
+        # A single watch should be small (< 10KB), not include entire datastore
+        self.assertLess(pickle_size, 10000,
+                       f"Pickled watch too large ({pickle_size} bytes) - might include datastore")
+
+    def test_tag_deepcopy_works(self):
+        """
+        Ensure Tag objects (which also inherit from watch_base) can be deepcopied.
+
+        Tags now have optional __datastore for consistency with Watch objects.
+        """
+        mock_datastore = {
+            'settings': {'application': {}},
+            'watching': {}
+        }
+
+        # Test 1: Tag without datastore (backward compatibility)
+        tag_without_ds = Tag.model(
+            datastore_path='/tmp/test',
+            default={'title': 'Test Tag', 'overrides_watch': True}
+        )
+        tag_copy1 = deepcopy(tag_without_ds)
+        self.assertEqual(tag_copy1['title'], 'Test Tag', "Tag data should be copied")
+
+        # Test 2: Tag with datastore (new pattern for consistency)
+        tag_with_ds = Tag.model(
+            datastore_path='/tmp/test',
+            __datastore=mock_datastore,
+            default={'title': 'Test Tag With DS', 'overrides_watch': True}
+        )
+
+        # Deepcopy should work
+        tag_copy2 = deepcopy(tag_with_ds)
+
+        # Test 3: Dict data is copied
+        self.assertEqual(tag_copy2['title'], 'Test Tag With DS', "Tag data should be copied")
+
+        # Test 4: Modifications to copy don't affect original
+        tag_copy2['title'] = 'MODIFIED'
+        self.assertNotEqual(tag_with_ds['title'], 'MODIFIED',
+                           "Modifying copy should not affect original")
+
+        # Test 5: Tag with datastore shares it (doesn't copy it)
+        if hasattr(tag_with_ds, '_model__datastore'):
+            self.assertIs(tag_copy2._model__datastore, tag_with_ds._model__datastore,
+                         "Tag should share __datastore reference like Watch does")
+
+    def test_watch_copy_performance(self):
+        """
+        Verify that our __deepcopy__ implementation doesn't cause performance issues.
+
+        With the fix, deepcopy should be fast because we're sharing datastore
+        instead of copying it.
+        """
+        import time
+
+        # Create a watch with large datastore (many watches)
+        mock_datastore = {
+            'settings': {'application': {}},
+            'watching': {}
+        }
+
+        # Add 100 watches to the datastore
+        for i in range(100):
+            w = Watch.model(
+                __datastore=mock_datastore,
+                datastore_path='/tmp/test',
+                default={'url': f'https://example{i}.com'}
+            )
+            mock_datastore['watching'][w['uuid']] = w
+
+        # Time how long deepcopy takes
+        watch = list(mock_datastore['watching'].values())[0]
+
+        start = time.time()
+        for _ in range(10):
+            _ = deepcopy(watch)
+        elapsed = time.time() - start
+
+        # Should be fast (< 0.1 seconds for 10 copies)
+        # If datastore was copied, it would take much longer
+        self.assertLess(elapsed, 0.5,
+                       f"Deepcopy too slow ({elapsed:.3f}s for 10 copies) - might be copying datastore")
+
 if __name__ == '__main__':
    unittest.main()
Author	SHA1	Message	Date
dgtlmoon	559a925129	Adding more tests and references	2026-02-05 16:47:31 +01:00
dgtlmoon	2349344d9e	Improved watch global settings handling (#3839 )	2026-02-05 16:40:00 +01:00
dgtlmoon	bdc2916c07	New datastore message should be warning not critical	2026-02-05 16:25:22 +01:00