Compare commits

...

9 Commits

Author SHA1 Message Date
dgtlmoon
9548f5bd8f Also URL addition in upgrade 2026-02-11 16:03:23 +01:00
dgtlmoon
5718280518 Use detactedh sha instead 2026-02-11 15:56:56 +01:00
dgtlmoon
b24ae45860 extra test 2026-02-11 15:53:01 +01:00
dgtlmoon
0e4e1cf65e Correct test of init 2026-02-11 15:47:03 +01:00
dgtlmoon
d810dc38f4 deep fetch 2026-02-11 15:37:25 +01:00
dgtlmoon
c1e9e012e3 upgrade path check 2026-02-11 15:34:18 +01:00
dgtlmoon
5c29f1cee8 Adding test step for upgrades 2026-02-11 15:33:07 +01:00
dgtlmoon
a0b8d8e3ca Better to quit 2026-02-11 15:17:19 +01:00
dgtlmoon
1942d42b06 Refactoring upgrade path 2026-02-11 15:13:23 +01:00
9 changed files with 269 additions and 258 deletions

View File

@@ -103,7 +103,7 @@ jobs:
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
# Basic pytest tests with ancillary services # Basic pytest tests with ancillary services
basic-tests: basic-tests:
@@ -516,3 +516,142 @@ jobs:
exit 1 exit 1
fi fi
docker rm sig-test docker rm sig-test
# Upgrade path test
upgrade-path-test:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 25
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history and tags for upgrade testing
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v6
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Check upgrade works without error
run: |
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
# Checkout old version and create datastore
git checkout 0.49.1
python3 -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
pip install 'pyOpenSSL>=23.2.0'
echo "=== Running version 0.49.1 to create datastore ==="
python3 ./changedetection.py -C -d /tmp/data &
APP_PID=$!
# Wait for app to be ready
echo "Waiting for 0.49.1 to be ready..."
sleep 6
# Extract API key from datastore (0.49.1 uses url-watches.json)
API_KEY=$(jq -r '.settings.application.api_access_token // empty' /tmp/data/url-watches.json)
echo "API Key: ${API_KEY:0:8}..."
# Create a watch with tag "github-group-test" via API
echo "Creating test watch with tag via API..."
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
--show-error --fail \
--retry 6 --retry-delay 1 --retry-connrefused \
-d '{
"url": "https://example.com/upgrade-test",
"tag": "github-group-test"
}'
echo "✓ Created watch with tag 'github-group-test'"
# Create a specific test URL watch
echo "Creating test URL watch via API..."
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
--show-error --fail \
-d '{
"url": "http://localhost/test.txt"
}'
echo "✓ Created watch for 'http://localhost/test.txt' in version 0.49.1"
# Stop the old version gracefully
kill $APP_PID
wait $APP_PID || true
echo "✓ Version 0.49.1 stopped"
# Upgrade to current version (use commit SHA since we're in detached HEAD)
echo "Upgrading to commit ${{ github.sha }}"
git checkout ${{ github.sha }}
pip install -r requirements.txt
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
echo "=== Upgrade test output ==="
cat /tmp/upgrade-test.log
echo "✓ Datastore upgraded successfully"
# Now start the current version normally to verify the tag survived
echo "=== Starting current version to verify tag exists after upgrade ==="
timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
APP_PID=$!
# Wait for app to be ready and fetch UI
echo "Waiting for current version to be ready..."
sleep 5
curl --retry 6 --retry-delay 1 --retry-connrefused --silent http://127.0.0.1:5000 > /tmp/ui-output.html
# Verify tag exists in UI
if grep -q "github-group-test" /tmp/ui-output.html; then
echo "✓ Tag 'github-group-test' found in UI after upgrade"
else
echo "ERROR: Tag 'github-group-test' not found in UI after upgrade"
echo "=== UI Output ==="
cat /tmp/ui-output.html
echo "=== App Log ==="
cat /tmp/ui-test.log
kill $APP_PID || true
exit 1
fi
# Verify test URL exists in UI
if grep -q "http://localhost/test.txt" /tmp/ui-output.html; then
echo "✓ Watch URL 'http://localhost/test.txt' found in UI after upgrade"
else
echo "ERROR: Watch URL 'http://localhost/test.txt' not found in UI after upgrade"
echo "=== UI Output ==="
cat /tmp/ui-output.html
echo "=== App Log ==="
cat /tmp/ui-test.log
kill $APP_PID || true
exit 1
fi
# Cleanup
kill $APP_PID || true
wait $APP_PID || true
echo ""
echo "✓✓✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }} ✓✓✓"
echo " - Commit: ${{ github.sha }}"
echo " - Datastore migrated successfully"
echo " - Tag 'github-group-test' survived upgrade"
echo " - Watch URL 'http://localhost/test.txt' survived upgrade"
echo "✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }}"
- name: Upload upgrade test logs
if: always()
uses: actions/upload-artifact@v6
with:
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
path: /tmp/upgrade-test.log

View File

@@ -371,7 +371,15 @@ def main():
# Dont' start if the JSON DB looks corrupt # Dont' start if the JSON DB looks corrupt
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.") logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
logger.critical(str(e)) logger.critical(str(e))
return sys.exit(1)
# Testing mode: Exit cleanly after datastore initialization (for CI/CD upgrade tests)
if os.environ.get('TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD'):
logger.success(f"TESTING MODE: Datastore loaded successfully from {app_config['datastore_path']}")
logger.success(f"TESTING MODE: Schema version: {datastore.data['settings']['application'].get('schema_version', 'unknown')}")
logger.success(f"TESTING MODE: Loaded {len(datastore.data['watching'])} watches")
logger.success("TESTING MODE: Exiting cleanly (TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD is set)")
sys.exit(0)
# Apply all_paused setting if specified via CLI # Apply all_paused setting if specified via CLI
if all_paused is not None: if all_paused is not None:

View File

@@ -20,11 +20,9 @@ See: Watch.py model docstring for full Pydantic architecture explanation
See: processors/restock_diff/processor.py:184-192 for current manual implementation See: processors/restock_diff/processor.py:184-192 for current manual implementation
""" """
import os
from changedetectionio.model import watch_base from changedetectionio.model import watch_base
from changedetectionio.model.persistence import EntityPersistenceMixin from changedetectionio.model.persistence import EntityPersistenceMixin
class model(EntityPersistenceMixin, watch_base): class model(EntityPersistenceMixin, watch_base):
""" """
Tag domain model - groups watches and can override their settings. Tag domain model - groups watches and can override their settings.

View File

@@ -2,7 +2,7 @@ import os
import uuid import uuid
from changedetectionio import strtobool from changedetectionio import strtobool
from .persistence import EntityPersistenceMixin from .persistence import EntityPersistenceMixin, _determine_entity_type
__all__ = ['EntityPersistenceMixin', 'watch_base'] __all__ = ['EntityPersistenceMixin', 'watch_base']
@@ -511,10 +511,8 @@ class watch_base(dict):
# Save to disk via subclass implementation # Save to disk via subclass implementation
try: try:
# Determine entity type from module name (Watch.py -> watch, Tag.py -> tag) # Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
from changedetectionio.model.persistence import _determine_entity_type
entity_type = _determine_entity_type(self.__class__) entity_type = _determine_entity_type(self.__class__)
filename = f"{entity_type}.json" filename = f"{entity_type}.json"
self._save_to_disk(data_dict, uuid) self._save_to_disk(data_dict, uuid)
logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}") logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
except Exception as e: except Exception as e:

View File

@@ -33,9 +33,8 @@ except ImportError:
from ..processors import get_custom_watch_obj_for_processor from ..processors import get_custom_watch_obj_for_processor
# Import the base class and helpers # Import the base class and helpers
from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_watch_atomic, save_tag_atomic, save_json_atomic from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_json_atomic
from .updates import DatastoreUpdatesMixin from .updates import DatastoreUpdatesMixin
from .legacy_loader import has_legacy_datastore
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification # Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)' BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
@@ -78,7 +77,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
logger.info(f"Backing up changedetection.json due to new version to '{db_path_version_backup}'.") logger.info(f"Backing up changedetection.json due to new version to '{db_path_version_backup}'.")
copyfile(db_path, db_path_version_backup) copyfile(db_path, db_path_version_backup)
def _load_settings(self): def _load_settings(self, filename="changedetection.json"):
""" """
Load settings from storage. Load settings from storage.
@@ -87,7 +86,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
Returns: Returns:
dict: Settings data loaded from storage dict: Settings data loaded from storage
""" """
changedetection_json = os.path.join(self.datastore_path, "changedetection.json") changedetection_json = os.path.join(self.datastore_path, filename)
logger.info(f"Loading settings from {changedetection_json}") logger.info(f"Loading settings from {changedetection_json}")
@@ -122,6 +121,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
if 'application' in settings_data['settings']: if 'application' in settings_data['settings']:
self.__data['settings']['application'].update(settings_data['settings']['application']) self.__data['settings']['application'].update(settings_data['settings']['application'])
# More or less for the old format which had this data in the one url-watches.json
# cant hurt to leave it here,
if 'watching' in settings_data:
self.__data['watching'].update(settings_data['watching'])
def _rehydrate_tags(self): def _rehydrate_tags(self):
"""Rehydrate tag entities from stored data into Tag objects with restock_diff processor.""" """Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
from ..model import Tag from ..model import Tag
@@ -146,23 +150,28 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
logger.info(f"Rehydrating {watch_count} watches...") logger.info(f"Rehydrating {watch_count} watches...")
watching_rehydrated = {} watching_rehydrated = {}
for uuid, watch_dict in self.__data.get('watching', {}).items(): for uuid, watch_dict in self.__data.get('watching', {}).items():
watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict) if isinstance(watch_dict, dict):
watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
else:
logger.error(f"Watch UUID {uuid} already rehydrated")
self.__data['watching'] = watching_rehydrated self.__data['watching'] = watching_rehydrated
logger.success(f"Rehydrated {watch_count} watches into Watch objects") logger.success(f"Rehydrated {watch_count} watches into Watch objects")
def _load_state(self): def _load_state(self, main_settings_filename="changedetection.json"):
""" """
Load complete datastore state from storage. Load complete datastore state from storage.
Orchestrates loading of settings, watches, and tags using polymorphic methods. Orchestrates loading of settings, watches, and tags using polymorphic methods.
""" """
# Load settings # Load settings
settings_data = self._load_settings() settings_data = self._load_settings(filename=main_settings_filename)
self._apply_settings(settings_data) self._apply_settings(settings_data)
# Load watches (polymorphic - parent class method) # Load watches, scan them from the disk
self._load_watches() self._load_watches()
self._rehydrate_watches()
# Load tags from individual tag.json files # Load tags from individual tag.json files
# These will override any tags in settings (migration path) # These will override any tags in settings (migration path)
@@ -200,112 +209,73 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Check if datastore already exists # Check if datastore already exists
changedetection_json = os.path.join(self.datastore_path, "changedetection.json") changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
changedetection_json_old_schema = os.path.join(self.datastore_path, "url-watches.json")
if os.path.exists(changedetection_json): if os.path.exists(changedetection_json):
# Load existing datastore (changedetection.json + watch.json files)
logger.info("Loading existing datastore")
try:
self._load_state()
except Exception as e:
logger.critical(f"Failed to load datastore: {e}")
raise
# Run schema updates if needed # Run schema updates if needed
# Pass current schema version from loaded datastore (defaults to 0 if not set) # Pass current schema version from loaded datastore (defaults to 0 if not set)
# Load existing datastore (changedetection.json + watch.json files)
logger.info("Loading existing datastore")
self._load_state()
current_schema = self.data['settings']['application'].get('schema_version', 0)
self.run_updates(current_schema_version=current_schema)
# Legacy datastore detected - trigger migration, even works if the schema is much before the migration step.
elif os.path.exists(changedetection_json_old_schema):
logger.critical(f"Legacy datastore detected at {changedetection_json_old_schema}, loading and running updates")
self._load_state(main_settings_filename="url-watches.json")
# update 26 will load the whole old config from disk to __data
current_schema = self.__data['settings']['application'].get('schema_version', 0) current_schema = self.__data['settings']['application'].get('schema_version', 0)
self.run_updates(current_schema_version=current_schema) self.run_updates(current_schema_version=current_schema)
# Probably tags were also shifted to disk and many other changes, so best to reload here.
self._load_state()
else: else:
# No datastore yet - check if this is a fresh install or legacy migration # No datastore yet - check if this is a fresh install or legacy migration
# Generate app_guid FIRST (required for all operations) self.init_fresh_install(include_default_watches=include_default_watches,
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ: version_tag=version_tag)
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
else:
self.__data['app_guid'] = str(uuid_builder.uuid4())
# Generate RSS access token def init_fresh_install(self, include_default_watches, version_tag):
self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16) # Generate app_guid FIRST (required for all operations)
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
else:
self.__data['app_guid'] = str(uuid_builder.uuid4())
# Generate API access token # Generate RSS access token
self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16) self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
# Check if legacy datastore exists (url-watches.json) # Generate API access token
if has_legacy_datastore(self.datastore_path): self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
# Legacy datastore detected - trigger migration logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
logger.critical(f"Legacy datastore detected at {self.datastore_path}/url-watches.json")
logger.critical("Migration will be triggered via update_26")
# Load the legacy datastore # Set schema version to latest (no updates needed)
from .legacy_loader import load_legacy_format latest_update_available = self.get_updates_available().pop()
legacy_path = os.path.join(self.datastore_path, "url-watches.json") logger.info(f"Marking fresh install to schema version {latest_update_available}")
legacy_data = load_legacy_format(legacy_path) self.__data['settings']['application']['schema_version'] = latest_update_available
if not legacy_data: # Add default watches if requested
raise Exception("Failed to load legacy datastore from url-watches.json") if include_default_watches:
self.add_watch(
url='https://news.ycombinator.com/',
tag='Tech news',
extras={'fetch_backend': 'html_requests'}
)
self.add_watch(
url='https://changedetection.io/CHANGELOG.txt',
tag='changedetection.io',
extras={'fetch_backend': 'html_requests'}
)
# Merge legacy data with base_config defaults (preserves new fields like 'ui') # Create changedetection.json immediately
# self.__data already has App.model() defaults from line 190 try:
logger.info("Merging legacy data with base_config defaults...") self._save_settings()
logger.info("Created changedetection.json for new datastore")
# Apply top-level fields from legacy data except Exception as e:
if 'app_guid' in legacy_data: logger.error(f"Failed to create initial changedetection.json: {e}")
self.__data['app_guid'] = legacy_data['app_guid']
if 'build_sha' in legacy_data:
self.__data['build_sha'] = legacy_data['build_sha']
if 'version_tag' in legacy_data:
self.__data['version_tag'] = legacy_data['version_tag']
# Apply watching data (complete replacement as these are user's watches)
if 'watching' in legacy_data:
self.__data['watching'] = legacy_data['watching']
# Merge settings sections (preserves base_config defaults for missing fields)
if 'settings' in legacy_data:
if 'headers' in legacy_data['settings']:
self.__data['settings']['headers'].update(legacy_data['settings']['headers'])
if 'requests' in legacy_data['settings']:
self.__data['settings']['requests'].update(legacy_data['settings']['requests'])
if 'application' in legacy_data['settings']:
# CRITICAL: Use .update() to merge, not replace
# This preserves new fields like 'ui' that exist in base_config
self.__data['settings']['application'].update(legacy_data['settings']['application'])
# CRITICAL: Rehydrate watches from dicts into Watch objects
# This ensures watches have their methods available during migration
self._rehydrate_watches()
# update_26 will save watches to individual files and create changedetection.json
# Next startup will load from new format normally
self.run_updates()
else:
# Fresh install - create new datastore
logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
# Set schema version to latest (no updates needed)
updates_available = self.get_updates_available()
self.__data['settings']['application']['schema_version'] = updates_available.pop() if updates_available else 26
# Add default watches if requested
if include_default_watches:
self.add_watch(
url='https://news.ycombinator.com/',
tag='Tech news',
extras={'fetch_backend': 'html_requests'}
)
self.add_watch(
url='https://changedetection.io/CHANGELOG.txt',
tag='changedetection.io',
extras={'fetch_backend': 'html_requests'}
)
# Create changedetection.json immediately
try:
self._save_settings()
logger.info("Created changedetection.json for new datastore")
except Exception as e:
logger.error(f"Failed to create initial changedetection.json: {e}")
# Set version tag # Set version tag
self.__data['version_tag'] = version_tag self.__data['version_tag'] = version_tag
@@ -383,17 +353,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Deep copy settings to avoid modifying the original # Deep copy settings to avoid modifying the original
settings_copy = copy.deepcopy(self.__data['settings']) settings_copy = copy.deepcopy(self.__data['settings'])
# Only exclude tags if we've already migrated them to individual files (schema >= 28)
# This ensures update_28 can migrate tags from settings
schema_version = self.__data['settings']['application'].get('schema_version', 0)
if schema_version >= 28:
# Tags are in individual tag.json files, don't save to settings
settings_copy['application']['tags'] = {}
# else: keep tags in settings for update_28 migration
return { return {
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json', 'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
'app_guid': self.__data['app_guid'], 'app_guid': self.__data.get('app_guid'),
'settings': settings_copy, 'settings': settings_copy,
'build_sha': self.__data.get('build_sha'), 'build_sha': self.__data.get('build_sha'),
'version_tag': self.__data.get('version_tag') 'version_tag': self.__data.get('version_tag')
@@ -422,15 +384,14 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
Implementation of abstract method from FileSavingDataStore. Implementation of abstract method from FileSavingDataStore.
Delegates to helper function and stores results in internal data structure. Delegates to helper function and stores results in internal data structure.
""" """
watching = load_all_watches(
self.datastore_path,
self.rehydrate_entity
)
# Store loaded data # Store loaded data
self.__data['watching'] = watching # @note this will also work for the old legacy format because self.__data['watching'] should already have them loaded by this point.
self.__data['watching'].update(load_all_watches(
logger.debug(f"Loaded {len(watching)} watches") self.datastore_path,
self.rehydrate_entity
))
logger.debug(f"Loaded {len(self.__data['watching'])} watches")
def _load_tags(self): def _load_tags(self):
""" """

View File

@@ -207,15 +207,6 @@ def save_watch_atomic(watch_dir, uuid, watch_dict):
save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10) save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)
def save_tag_atomic(tag_dir, uuid, tag_dict):
"""
Save a tag to disk using atomic write pattern.
Convenience wrapper around save_entity_atomic for tags.
Kept for backwards compatibility.
"""
save_entity_atomic(tag_dir, uuid, tag_dict, "tag.json", "tag", max_size_mb=1)
def load_watch_from_file(watch_json, uuid, rehydrate_entity_func): def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
""" """

View File

@@ -1,66 +0,0 @@
"""
Legacy format loader for url-watches.json.
Provides functions to detect and load from the legacy monolithic JSON format.
Used during migration (update_26) to transition to individual watch.json files.
"""
import os
import json
from loguru import logger
# Try to import orjson for faster JSON serialization
try:
import orjson
HAS_ORJSON = True
except ImportError:
HAS_ORJSON = False
def has_legacy_datastore(datastore_path):
"""
Check if a legacy url-watches.json file exists.
This is used by update_26 to determine if migration is needed.
Args:
datastore_path: Path to datastore directory
Returns:
bool: True if url-watches.json exists
"""
url_watches_json = os.path.join(datastore_path, "url-watches.json")
return os.path.exists(url_watches_json)
def load_legacy_format(json_store_path):
"""
Load datastore from legacy url-watches.json format.
Args:
json_store_path: Full path to url-watches.json file
Returns:
dict: Loaded datastore data with 'watching', 'settings', etc.
None: If file doesn't exist or loading failed
"""
logger.info(f"Loading from legacy format: {json_store_path}")
if not os.path.isfile(json_store_path):
logger.warning(f"Legacy file not found: {json_store_path}")
return None
try:
if HAS_ORJSON:
with open(json_store_path, 'rb') as f:
data = orjson.loads(f.read())
else:
with open(json_store_path, 'r', encoding='utf-8') as f:
data = json.load(f)
logger.info(f"Loaded {len(data.get('watching', {}))} watches from legacy format")
return data
except Exception as e:
logger.error(f"Failed to load legacy format: {e}")
return None

View File

@@ -16,12 +16,18 @@ import time
from loguru import logger from loguru import logger
from copy import deepcopy from copy import deepcopy
# Try to import orjson for faster JSON serialization
try:
import orjson
HAS_ORJSON = True
except ImportError:
HAS_ORJSON = False
from ..html_tools import TRANSLATE_WHITESPACE_TABLE from ..html_tools import TRANSLATE_WHITESPACE_TABLE
from ..processors.restock_diff import Restock from ..processors.restock_diff import Restock
from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from .file_saving_datastore import save_watch_atomic
def create_backup_tarball(datastore_path, update_number): def create_backup_tarball(datastore_path, update_number):
""" """
@@ -97,7 +103,7 @@ def create_backup_tarball(datastore_path, update_number):
tar.add(tag_json, arcname=f"{entry}/tag.json") tar.add(tag_json, arcname=f"{entry}/tag.json")
tag_count += 1 tag_count += 1
logger.success(f"Backup created: {backup_filename} ({watch_count} watches, {tag_count} tags)") logger.success(f"Backup created: {backup_filename} ({watch_count} watches from disk, {tag_count} tags from disk)")
return backup_path return backup_path
except Exception as e: except Exception as e:
@@ -137,6 +143,7 @@ class DatastoreUpdatesMixin:
return updates_available return updates_available
def run_updates(self, current_schema_version=None): def run_updates(self, current_schema_version=None):
import sys
""" """
Run all pending schema updates sequentially. Run all pending schema updates sequentially.
@@ -160,6 +167,23 @@ class DatastoreUpdatesMixin:
4. All changes saved via individual .commit() calls 4. All changes saved via individual .commit() calls
""" """
updates_available = self.get_updates_available() updates_available = self.get_updates_available()
if self.data.get('watching'):
test_watch = self.data['watching'].get(next(iter(self.data.get('watching', {}))))
from ..model.Watch import model
if not isinstance(test_watch, model):
import sys
logger.critical("Cannot run updates! Watch structure must be re-hydrated back to a Watch model object!")
sys.exit(1)
if self.data['settings']['application'].get('tags',{}):
test_tag = self.data['settings']['application'].get('tags',{}).get(next(iter(self.data['settings']['application'].get('tags',{}))))
from ..model.Tag import model as tag_model
if not isinstance(test_tag, tag_model):
import sys
logger.critical("Cannot run updates! Watch tag/group structure must be re-hydrated back to a Tag model object!")
sys.exit(1)
# Determine current schema version # Determine current schema version
if current_schema_version is None: if current_schema_version is None:
@@ -201,10 +225,9 @@ class DatastoreUpdatesMixin:
try: try:
update_method = getattr(self, f"update_{update_n}")() update_method = getattr(self, f"update_{update_n}")()
except Exception as e: except Exception as e:
logger.error(f"Error while trying update_{update_n}") logger.critical(f"Error while trying update_{update_n}")
logger.error(e) logger.exception(e)
# Don't run any more updates sys.exit(1)
return
else: else:
# Bump the version # Bump the version
self.data['settings']['application']['schema_version'] = update_n self.data['settings']['application']['schema_version'] = update_n
@@ -555,27 +578,6 @@ class DatastoreUpdatesMixin:
logger.critical("COPY-based migration: url-watches.json will remain intact for rollback") logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
logger.critical("=" * 80) logger.critical("=" * 80)
# Check if already migrated
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
if os.path.exists(changedetection_json):
logger.info("Migration already completed (changedetection.json exists), skipping")
return
# Check if we need to load legacy data
from .legacy_loader import has_legacy_datastore, load_legacy_format
if not has_legacy_datastore(self.datastore_path):
logger.info("No legacy datastore found, nothing to migrate")
return
# Load legacy data from url-watches.json
logger.critical("Loading legacy datastore from url-watches.json...")
legacy_path = os.path.join(self.datastore_path, "url-watches.json")
legacy_data = load_legacy_format(legacy_path)
if not legacy_data:
raise Exception("Failed to load legacy datastore from url-watches.json")
# Populate settings from legacy data # Populate settings from legacy data
logger.info("Populating settings from legacy data...") logger.info("Populating settings from legacy data...")
watch_count = len(self.data['watching']) watch_count = len(self.data['watching'])
@@ -587,9 +589,7 @@ class DatastoreUpdatesMixin:
saved_count = 0 saved_count = 0
for uuid, watch in self.data['watching'].items(): for uuid, watch in self.data['watching'].items():
try: try:
watch_dict = dict(watch) watch.commit()
watch_dir = os.path.join(self.datastore_path, uuid)
save_watch_atomic(watch_dir, uuid, watch_dict)
saved_count += 1 saved_count += 1
if saved_count % 100 == 0: if saved_count % 100 == 0:
@@ -635,18 +635,19 @@ class DatastoreUpdatesMixin:
# Phase 4: Verify settings file exists # Phase 4: Verify settings file exists
logger.critical("Phase 4/4: Verifying changedetection.json exists...") logger.critical("Phase 4/4: Verifying changedetection.json exists...")
changedetection_json_new_schema=os.path.join(self.datastore_path, "changedetection.json")
if not os.path.isfile(changedetection_json_new_schema):
import sys
logger.critical("Migration failed, changedetection.json not found after update ran!")
sys.exit(1)
if not os.path.isfile(changedetection_json):
raise Exception(
"Migration failed: changedetection.json not found after save. "
"url-watches.json remains intact, safe to retry."
)
logger.critical("Phase 4 complete: Verified changedetection.json exists") logger.critical("Phase 4 complete: Verified changedetection.json exists")
# Success! Now reload from new format # Success! Now reload from new format
logger.critical("Reloading datastore from new format...") logger.critical("Reloading datastore from new format...")
self._load_state() # Includes load_watches # write it to disk, it will be saved without ['watching'] in the JSON db because we find it from disk glob
self._save_settings()
logger.success("Datastore reloaded from new format successfully") logger.success("Datastore reloaded from new format successfully")
logger.critical("=" * 80) logger.critical("=" * 80)
logger.critical("MIGRATION COMPLETED SUCCESSFULLY!") logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
@@ -681,9 +682,11 @@ class DatastoreUpdatesMixin:
- Enables independent tag versioning/backup - Enables independent tag versioning/backup
- Maintains backwards compatibility (tags stay in settings too) - Maintains backwards compatibility (tags stay in settings too)
""" """
# Force save as tag.json (not watch.json) even if object is corrupted
logger.critical("=" * 80) logger.critical("=" * 80)
logger.critical("Running migration: Individual tag persistence (update_28)") logger.critical("Running migration: Individual tag persistence (update_28)")
logger.critical("Creating individual tag.json files (tags remain in settings too)") logger.critical("Creating individual tag.json files")
logger.critical("=" * 80) logger.critical("=" * 80)
tags = self.data['settings']['application'].get('tags', {}) tags = self.data['settings']['application'].get('tags', {})
@@ -700,27 +703,8 @@ class DatastoreUpdatesMixin:
for uuid, tag_data in tags.items(): for uuid, tag_data in tags.items():
try: try:
# Force save as tag.json (not watch.json) even if object is corrupted tag_data.commit()
from changedetectionio.store.file_saving_datastore import save_entity_atomic
import os
tag_dir = os.path.join(self.datastore_path, uuid)
os.makedirs(tag_dir, exist_ok=True)
# Convert to dict if it's an object
tag_dict = dict(tag_data) if hasattr(tag_data, '__iter__') else tag_data
# Save explicitly as tag.json
save_entity_atomic(
tag_dir,
uuid,
tag_dict,
filename='tag.json',
entity_type='tag',
max_size_mb=1
)
saved_count += 1 saved_count += 1
if saved_count % 10 == 0: if saved_count % 10 == 0:
logger.info(f" Progress: {saved_count}/{tag_count} tags migrated...") logger.info(f" Progress: {saved_count}/{tag_count} tags migrated...")
@@ -737,5 +721,5 @@ class DatastoreUpdatesMixin:
# On next load, _load_tags() will read from tag.json files and merge with settings # On next load, _load_tags() will read from tag.json files and merge with settings
logger.info("Tags saved to both settings AND individual tag.json files") logger.info("Tags saved to both settings AND individual tag.json files")
logger.info("Future tag edits will update both locations (dual storage)") logger.info("Future tag edits will update both locations (dual storage)")
logger.critical("=" * 80)
logger.critical("=" * 80)

View File

@@ -5,6 +5,8 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches
import os import os
from ..store import ChangeDetectionStore
# def test_setup(client, live_server, measure_memory_usage, datastore_path): # def test_setup(client, live_server, measure_memory_usage, datastore_path):
# live_server_setup(live_server) # Setup on conftest per function # live_server_setup(live_server) # Setup on conftest per function
@@ -487,7 +489,6 @@ def test_tag_json_persistence(client, live_server, measure_memory_usage, datasto
- Tag deletion removes tag.json file - Tag deletion removes tag.json file
""" """
import json import json
from changedetectionio.store import ChangeDetectionStore
datastore = client.application.config.get('DATASTORE') datastore = client.application.config.get('DATASTORE')
@@ -569,9 +570,6 @@ def test_tag_json_migration_update_27(client, live_server, measure_memory_usage,
This simulates a pre-update_27 datastore and verifies migration works. This simulates a pre-update_27 datastore and verifies migration works.
""" """
import json import json
from changedetectionio.store import ChangeDetectionStore
datastore = client.application.config.get('DATASTORE')
# 1. Create multiple tags # 1. Create multiple tags
tag_names = ['migration-tag-1', 'migration-tag-2', 'migration-tag-3'] tag_names = ['migration-tag-1', 'migration-tag-2', 'migration-tag-3']