Tags update fix (#3849 )

2026-02-09 07:46:01 +00:00 · 2026-02-07 17:13:41 +01:00
6 changed files with 127 additions and 93 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -605,7 +605,7 @@ def main():
        return dict(right_sticky="v{}".format(datastore.data['version_tag']),
                    new_version_available=app.config['NEW_VERSION_AVAILABLE'],
                    has_password=datastore.data['settings']['application']['password'] != False,
-                    socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True),
+                    socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
                    all_paused=datastore.data['settings']['application'].get('all_paused', False),
                    all_muted=datastore.data['settings']['application'].get('all_muted', False)
                    )
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -848,7 +848,7 @@ def changedetection_app(config=None, datastore_o=None):
    app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')

    # Initialize Socket.IO server conditionally based on settings
-    socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
+    socket_io_enabled = datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True)
    if socket_io_enabled and app.config.get('batch_mode'):
        socket_io_enabled = False
    if socket_io_enabled:
--- a/changedetectionio/model/init.py
+++ b/changedetectionio/model/init.py
@@ -510,7 +510,12 @@ class watch_base(dict):

        # Save to disk via subclass implementation
        try:
+            # Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
+            from changedetectionio.model.persistence import _determine_entity_type
+            entity_type = _determine_entity_type(self.__class__)
+            filename = f"{entity_type}.json"
+
            self._save_to_disk(data_dict, uuid)
-            logger.debug(f"Committed {self.__class__.__name__.lower()} {uuid}")
+            logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
        except Exception as e:
            logger.error(f"Failed to commit {uuid}: {e}")
--- a/changedetectionio/store/init.py
+++ b/changedetectionio/store/init.py
@@ -123,10 +123,17 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
                self.__data['settings']['application'].update(settings_data['settings']['application'])

    def _rehydrate_tags(self):
-        """Rehydrate tag entities from stored data."""
+        """Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
+        from ..model import Tag
+
        for uuid, tag in self.__data['settings']['application']['tags'].items():
-            self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(
-                uuid, tag, processor_override='restock_diff'
+            # Force processor to restock_diff for override functionality (technical debt)
+            tag['processor'] = 'restock_diff'
+
+            self.__data['settings']['application']['tags'][uuid] = Tag.model(
+                datastore_path=self.datastore_path,
+                __datastore=self.__data,
+                default=tag
            )
            logger.info(f"Tag: {uuid} {tag['title']}")

@@ -236,8 +243,32 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
                if not legacy_data:
                    raise Exception("Failed to load legacy datastore from url-watches.json")

-                # Store the loaded data
-                self.__data = legacy_data
+                # Merge legacy data with base_config defaults (preserves new fields like 'ui')
+                # self.__data already has App.model() defaults from line 190
+                logger.info("Merging legacy data with base_config defaults...")
+
+                # Apply top-level fields from legacy data
+                if 'app_guid' in legacy_data:
+                    self.__data['app_guid'] = legacy_data['app_guid']
+                if 'build_sha' in legacy_data:
+                    self.__data['build_sha'] = legacy_data['build_sha']
+                if 'version_tag' in legacy_data:
+                    self.__data['version_tag'] = legacy_data['version_tag']
+
+                # Apply watching data (complete replacement as these are user's watches)
+                if 'watching' in legacy_data:
+                    self.__data['watching'] = legacy_data['watching']
+
+                # Merge settings sections (preserves base_config defaults for missing fields)
+                if 'settings' in legacy_data:
+                    if 'headers' in legacy_data['settings']:
+                        self.__data['settings']['headers'].update(legacy_data['settings']['headers'])
+                    if 'requests' in legacy_data['settings']:
+                        self.__data['settings']['requests'].update(legacy_data['settings']['requests'])
+                    if 'application' in legacy_data['settings']:
+                        # CRITICAL: Use .update() to merge, not replace
+                        # This preserves new fields like 'ui' that exist in base_config
+                        self.__data['settings']['application'].update(legacy_data['settings']['application'])

                # CRITICAL: Rehydrate watches from dicts into Watch objects
                # This ensures watches have their methods available during migration
@@ -340,20 +371,25 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        """
        Build settings data structure for saving.

-        Tags are excluded - they are stored in individual {uuid}/tag.json files.
-        This keeps changedetection.json small and allows atomic tag updates.
+        Tags behavior depends on schema version:
+        - Before update_28 (schema < 28): Tags saved in settings for migration
+        - After update_28 (schema >= 28): Tags excluded from settings (in individual files)

        Returns:
-            dict: Settings data ready for serialization (without tags)
+            dict: Settings data ready for serialization
        """
        import copy

        # Deep copy settings to avoid modifying the original
        settings_copy = copy.deepcopy(self.__data['settings'])

-        # Replace tags dict with empty dict (tags are in individual tag.json files)
-        # We keep the empty dict for backwards compatibility and clear structure
-        settings_copy['application']['tags'] = {}
+        # Only exclude tags if we've already migrated them to individual files (schema >= 28)
+        # This ensures update_28 can migrate tags from settings
+        schema_version = self.__data['settings']['application'].get('schema_version', 0)
+        if schema_version >= 28:
+            # Tags are in individual tag.json files, don't save to settings
+            settings_copy['application']['tags'] = {}
+        # else: keep tags in settings for update_28 migration

        return {
            'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
@@ -403,9 +439,22 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
        File backend implementation: reads individual tag.json files.
        Tags loaded from files override any tags in settings (migration path).
        """
+        from ..model import Tag
+
+        def rehydrate_tag(uuid, entity_dict):
+            """Rehydrate tag as Tag object with forced restock_diff processor."""
+            entity_dict['uuid'] = uuid
+            entity_dict['processor'] = 'restock_diff'  # Force processor for override functionality
+
+            return Tag.model(
+                datastore_path=self.datastore_path,
+                __datastore=self.__data,
+                default=entity_dict
+            )
+
        tags = load_all_tags(
            self.datastore_path,
-            self.rehydrate_entity
+            rehydrate_tag
        )

        # Override settings tags with loaded tags
--- a/changedetectionio/store/file_saving_datastore.py
+++ b/changedetectionio/store/file_saving_datastore.py
@@ -227,8 +227,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
        rehydrate_entity_func: Function to convert dict to Watch object

    Returns:
-        Tuple of (Watch object, raw_data_dict) or (None, None) if failed
-        The raw_data_dict is needed to compute the hash before rehydration
+        Watch object or None if failed
    """
    try:
        # Check file size before reading
@@ -241,7 +240,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
                f"File: {watch_json}. This indicates a bug or data corruption. "
                f"Watch will be skipped."
            )
-            return None, None
+            return None

        if HAS_ORJSON:
            with open(watch_json, 'rb') as f:
@@ -250,10 +249,9 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
            with open(watch_json, 'r', encoding='utf-8') as f:
                watch_data = json.load(f)

-        # Return both the raw data and the rehydrated watch
-        # Raw data is needed to compute hash before rehydration changes anything
+        # Rehydrate and return watch object
        watch_obj = rehydrate_entity_func(uuid, watch_data)
-        return watch_obj, watch_data
+        return watch_obj

    except json.JSONDecodeError as e:
        logger.critical(
@@ -261,7 +259,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
            f"File: {watch_json}. Error: {e}. "
            f"Watch will be skipped and may need manual recovery from backup."
        )
-        return None, None
+        return None
    except ValueError as e:
        # orjson raises ValueError for invalid JSON
        if "invalid json" in str(e).lower() or HAS_ORJSON:
@@ -270,15 +268,15 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
                f"File: {watch_json}. Error: {e}. "
                f"Watch will be skipped and may need manual recovery from backup."
            )
-            return None, None
+            return None
        # Re-raise if it's not a JSON parsing error
        raise
    except FileNotFoundError:
        logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
-        return None, None
+        return None
    except Exception as e:
        logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
-        return None, None
+        return None


 def load_all_watches(datastore_path, rehydrate_entity_func):
@@ -318,8 +316,8 @@ def load_all_watches(datastore_path, rehydrate_entity_func):
    for watch_json in watch_files:
        # Extract UUID from path: /datastore/{uuid}/watch.json
        uuid_dir = os.path.basename(os.path.dirname(watch_json))
-        watch, raw_data = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
-        if watch and raw_data:
+        watch = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
+        if watch:
            watching[uuid_dir] = watch
            loaded += 1

@@ -375,8 +373,10 @@ def load_tag_from_file(tag_json, uuid, rehydrate_entity_func):
            with open(tag_json, 'r', encoding='utf-8') as f:
                tag_data = json.load(f)

+        tag_data['processor'] = 'restock_diff'
        # Rehydrate tag (convert dict to Tag object)
-        tag_obj = rehydrate_entity_func(uuid, tag_data, processor_override='restock_diff')
+        # processor_override is set inside the rehydration function
+        tag_obj = rehydrate_entity_func(uuid, tag_data)
        return tag_obj

    except json.JSONDecodeError as e:
--- a/changedetectionio/store/updates.py
+++ b/changedetectionio/store/updates.py
@@ -154,10 +154,10 @@ class DatastoreUpdatesMixin:
        2. For each update > current schema version:
           - Create backup of datastore
           - Run update method
-           - Update schema version
-           - Mark settings and watches dirty
+           - Update schema version and commit settings
+           - Commit all watches and tags
        3. If any update fails, stop processing
-        4. Save all changes immediately
+        4. All changes saved via individual .commit() calls
        """
        updates_available = self.get_updates_available()

@@ -206,39 +206,11 @@ class DatastoreUpdatesMixin:
                    # Don't run any more updates
                    return
                else:
-                    # Bump the version, important
+                    # Bump the version
                    self.data['settings']['application']['schema_version'] = update_n
                    self.commit()

-                    # CRITICAL: Save all watches so changes are persisted
-                    # Most updates modify watches, and in the new individual watch.json structure,
-                    # we need to ensure those changes are saved
-                    logger.info(f"Saving all {len(self.data['watching'])} watches after update_{update_n} (so that it saves them to disk)")
-                    for uuid in self.data['watching'].keys():
-                        self.data['watching'][uuid].commit()
-
-                    # CRITICAL: Save all tags so changes are persisted
-                    # After update_27, tags have individual tag.json files
-                    # For updates before update_27, this will fail silently (tags don't have commit() yet)
-                    tags = self.data['settings']['application'].get('tags', {})
-                    if tags and update_n >= 27:
-                        logger.info(f"Saving all {len(tags)} tags after update_{update_n}")
-                        for uuid in tags.keys():
-                            try:
-                                tags[uuid].commit()
-                            except AttributeError:
-                                # Tag doesn't have commit() method yet (pre-update_27)
-                                pass
-
-                    # Save changes immediately after each update (more resilient than batching)
-                    logger.critical(f"Saving all changes after update_{update_n}")
-                    try:
-                        self._save_dirty_items()
-                        logger.success(f"Update {update_n} changes saved successfully")
-                    except Exception as e:
-                        logger.error(f"Failed to save update_{update_n} changes: {e}")
-                        # Don't raise - update already ran, but changes might not be persisted
-                        # The update will try to run again on next startup
+                    logger.success(f"Update {update_n} completed")

                    # Track which updates ran
                    updates_ran.append(update_n)
@@ -488,6 +460,14 @@ class DatastoreUpdatesMixin:
                del self.data['watching'][uuid]['extract_title_as_title']

        if self.data['settings']['application'].get('extract_title_as_title'):
+            # Ensure 'ui' key exists (defensive for edge cases where base_config merge didn't happen)
+            if 'ui' not in self.data['settings']['application']:
+                self.data['settings']['application']['ui'] = {
+                    'use_page_title_in_list': True,
+                    'open_diff_in_new_tab': True,
+                    'socket_io_enabled': True,
+                    'favicons_enabled': True
+                }
            self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')

    def update_21(self):
@@ -668,23 +648,6 @@ class DatastoreUpdatesMixin:
        logger.critical("Reloading datastore from new format...")
        self._load_state() # Includes load_watches
        logger.success("Datastore reloaded from new format successfully")
-
-
-        # Verify all watches have hashes after migration
-        missing_hashes = [uuid for uuid in self.data['watching'].keys() if uuid not in self._watch_hashes]
-        if missing_hashes:
-            logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after migration: {missing_hashes[:5]}")
-        else:
-            logger.success(f"All {len(self.data['watching'])} watches have valid hashes after migration")
-
-        # Set schema version to latest available update
-        # This prevents re-running updates and re-marking all watches as dirty
-        updates_available = self.get_updates_available()
-        latest_schema = updates_available[-1] if updates_available else 26
-        self.data['settings']['application']['schema_version'] = latest_schema
-        self.commit()
-        logger.info(f"Set schema_version to {latest_schema} (migration complete, all watches already saved)")
-
        logger.critical("=" * 80)
        logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
        logger.critical("=" * 80)
@@ -705,22 +668,22 @@ class DatastoreUpdatesMixin:
    def update_26(self):
        self.migrate_legacy_db_format()

-    def update_27(self):
+    def update_28(self):
        """
        Migrate tags to individual tag.json files.

-        Tags are currently saved as part of changedetection.json (settings).
-        This migration moves them to individual {uuid}/tag.json files,
-        similar to how watches are stored.
+        Tags are currently saved only in changedetection.json (settings).
+        This migration ALSO saves them to individual {uuid}/tag.json files,
+        similar to how watches are stored (dual storage).

        Benefits:
-        - Reduces changedetection.json size
        - Allows atomic tag updates without rewriting entire settings
        - Enables independent tag versioning/backup
+        - Maintains backwards compatibility (tags stay in settings too)
        """
        logger.critical("=" * 80)
-        logger.critical("Running migration: Individual tag persistence (update_27)")
-        logger.critical("Moving tags from settings to individual tag.json files")
+        logger.critical("Running migration: Individual tag persistence (update_28)")
+        logger.critical("Creating individual tag.json files (tags remain in settings too)")
        logger.critical("=" * 80)

        tags = self.data['settings']['application'].get('tags', {})
@@ -735,17 +698,34 @@ class DatastoreUpdatesMixin:
        saved_count = 0
        failed_count = 0

-        for uuid, tag in tags.items():
+        for uuid, tag_data in tags.items():
            try:
-                # Save tag to its own file
-                tag.commit()
+                # Force save as tag.json (not watch.json) even if object is corrupted
+                from changedetectionio.store.file_saving_datastore import save_entity_atomic
+                import os
+
+                tag_dir = os.path.join(self.datastore_path, uuid)
+                os.makedirs(tag_dir, exist_ok=True)
+
+                # Convert to dict if it's an object
+                tag_dict = dict(tag_data) if hasattr(tag_data, '__iter__') else tag_data
+
+                # Save explicitly as tag.json
+                save_entity_atomic(
+                    tag_dir,
+                    uuid,
+                    tag_dict,
+                    filename='tag.json',
+                    entity_type='tag',
+                    max_size_mb=1
+                )
                saved_count += 1

                if saved_count % 10 == 0:
                    logger.info(f"  Progress: {saved_count}/{tag_count} tags migrated...")

            except Exception as e:
-                logger.error(f"Failed to save tag {uuid} ({tag.get('title', 'unknown')}): {e}")
+                logger.error(f"Failed to save tag {uuid} ({tag_data.get('title', 'unknown')}): {e}")
                failed_count += 1

        if failed_count > 0:
@@ -753,9 +733,9 @@ class DatastoreUpdatesMixin:
        else:
            logger.success(f"Migration complete: {saved_count} tags saved to individual tag.json files")

-        # Tags remain in settings for backwards compatibility
-        # On next load, _load_tags() will read from tag.json files and override settings
-        logger.info("Tags remain in settings for backwards compatibility")
-        logger.info("Future tag edits will save to tag.json files only")
+        # Tags remain in settings for backwards compatibility AND easy access
+        # On next load, _load_tags() will read from tag.json files and merge with settings
+        logger.info("Tags saved to both settings AND individual tag.json files")
+        logger.info("Future tag edits will update both locations (dual storage)")

        logger.critical("=" * 80)