Compare commits

...

15 Commits

Author SHA1 Message Date
dgtlmoon
9548f5bd8f Also URL addition in upgrade 2026-02-11 16:03:23 +01:00
dgtlmoon
5718280518 Use detactedh sha instead 2026-02-11 15:56:56 +01:00
dgtlmoon
b24ae45860 extra test 2026-02-11 15:53:01 +01:00
dgtlmoon
0e4e1cf65e Correct test of init 2026-02-11 15:47:03 +01:00
dgtlmoon
d810dc38f4 deep fetch 2026-02-11 15:37:25 +01:00
dgtlmoon
c1e9e012e3 upgrade path check 2026-02-11 15:34:18 +01:00
dgtlmoon
5c29f1cee8 Adding test step for upgrades 2026-02-11 15:33:07 +01:00
dgtlmoon
a0b8d8e3ca Better to quit 2026-02-11 15:17:19 +01:00
dgtlmoon
1942d42b06 Refactoring upgrade path 2026-02-11 15:13:23 +01:00
dgtlmoon
5726c5a0ac API - Import use background task to import large lists (#3858)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-11 08:15:58 +01:00
dgtlmoon
80f7decf4f API - Bumping docs 2026-02-11 07:44:45 +01:00
dgtlmoon
c66a29b011 API - Import - Ability to set any watch value as HTTP URL Query value, for example ?processor=restock_diff&time_between_check={'hours':24} Re #3845 (#3857) 2026-02-11 07:26:48 +01:00
dgtlmoon
a1a2e5c5bf API - Include missing tags in fetching watch information. #3854 (#3856) 2026-02-11 06:45:19 +01:00
dgtlmoon
6e90a0bbd1 UI - Bulk checkbox operations modal confirmation fix Re #3853 2026-02-11 06:29:59 +01:00
dgtlmoon
987789425d Tags update fix (#3849)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-07 17:13:41 +01:00
17 changed files with 830 additions and 344 deletions

View File

@@ -103,7 +103,7 @@ jobs:
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'
# Basic pytest tests with ancillary services
basic-tests:
@@ -516,3 +516,142 @@ jobs:
exit 1
fi
docker rm sig-test
# Upgrade path test
upgrade-path-test:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 25
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history and tags for upgrade testing
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v6
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Check upgrade works without error
run: |
echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
# Checkout old version and create datastore
git checkout 0.49.1
python3 -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
pip install 'pyOpenSSL>=23.2.0'
echo "=== Running version 0.49.1 to create datastore ==="
python3 ./changedetection.py -C -d /tmp/data &
APP_PID=$!
# Wait for app to be ready
echo "Waiting for 0.49.1 to be ready..."
sleep 6
# Extract API key from datastore (0.49.1 uses url-watches.json)
API_KEY=$(jq -r '.settings.application.api_access_token // empty' /tmp/data/url-watches.json)
echo "API Key: ${API_KEY:0:8}..."
# Create a watch with tag "github-group-test" via API
echo "Creating test watch with tag via API..."
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
--show-error --fail \
--retry 6 --retry-delay 1 --retry-connrefused \
-d '{
"url": "https://example.com/upgrade-test",
"tag": "github-group-test"
}'
echo "✓ Created watch with tag 'github-group-test'"
# Create a specific test URL watch
echo "Creating test URL watch via API..."
curl -X POST "http://127.0.0.1:5000/api/v1/watch" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
--show-error --fail \
-d '{
"url": "http://localhost/test.txt"
}'
echo "✓ Created watch for 'http://localhost/test.txt' in version 0.49.1"
# Stop the old version gracefully
kill $APP_PID
wait $APP_PID || true
echo "✓ Version 0.49.1 stopped"
# Upgrade to current version (use commit SHA since we're in detached HEAD)
echo "Upgrading to commit ${{ github.sha }}"
git checkout ${{ github.sha }}
pip install -r requirements.txt
echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ==="
TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1
echo "=== Upgrade test output ==="
cat /tmp/upgrade-test.log
echo "✓ Datastore upgraded successfully"
# Now start the current version normally to verify the tag survived
echo "=== Starting current version to verify tag exists after upgrade ==="
timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 &
APP_PID=$!
# Wait for app to be ready and fetch UI
echo "Waiting for current version to be ready..."
sleep 5
curl --retry 6 --retry-delay 1 --retry-connrefused --silent http://127.0.0.1:5000 > /tmp/ui-output.html
# Verify tag exists in UI
if grep -q "github-group-test" /tmp/ui-output.html; then
echo "✓ Tag 'github-group-test' found in UI after upgrade"
else
echo "ERROR: Tag 'github-group-test' not found in UI after upgrade"
echo "=== UI Output ==="
cat /tmp/ui-output.html
echo "=== App Log ==="
cat /tmp/ui-test.log
kill $APP_PID || true
exit 1
fi
# Verify test URL exists in UI
if grep -q "http://localhost/test.txt" /tmp/ui-output.html; then
echo "✓ Watch URL 'http://localhost/test.txt' found in UI after upgrade"
else
echo "ERROR: Watch URL 'http://localhost/test.txt' not found in UI after upgrade"
echo "=== UI Output ==="
cat /tmp/ui-output.html
echo "=== App Log ==="
cat /tmp/ui-test.log
kill $APP_PID || true
exit 1
fi
# Cleanup
kill $APP_PID || true
wait $APP_PID || true
echo ""
echo "✓✓✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }} ✓✓✓"
echo " - Commit: ${{ github.sha }}"
echo " - Datastore migrated successfully"
echo " - Tag 'github-group-test' survived upgrade"
echo " - Watch URL 'http://localhost/test.txt' survived upgrade"
echo "✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }}"
- name: Upload upgrade test logs
if: always()
uses: actions/upload-artifact@v6
with:
name: upgrade-test-logs-py${{ env.PYTHON_VERSION }}
path: /tmp/upgrade-test.log

View File

@@ -371,7 +371,15 @@ def main():
# Dont' start if the JSON DB looks corrupt
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
logger.critical(str(e))
return
sys.exit(1)
# Testing mode: Exit cleanly after datastore initialization (for CI/CD upgrade tests)
if os.environ.get('TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD'):
logger.success(f"TESTING MODE: Datastore loaded successfully from {app_config['datastore_path']}")
logger.success(f"TESTING MODE: Schema version: {datastore.data['settings']['application'].get('schema_version', 'unknown')}")
logger.success(f"TESTING MODE: Loaded {len(datastore.data['watching'])} watches")
logger.success("TESTING MODE: Exiting cleanly (TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD is set)")
sys.exit(0)
# Apply all_paused setting if specified via CLI
if all_paused is not None:
@@ -605,7 +613,7 @@ def main():
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
has_password=datastore.data['settings']['application']['password'] != False,
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True),
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
all_paused=datastore.data['settings']['application'].get('all_paused', False),
all_muted=datastore.data['settings']['application'].get('all_muted', False)
)

View File

@@ -2,8 +2,12 @@ from changedetectionio.strtobool import strtobool
from flask_restful import abort, Resource
from flask import request
from functools import wraps
from . import auth, validate_openapi_request
from . import auth, validate_openapi_request, schema_create_watch
from ..validate_url import is_safe_valid_url
import json
# Number of URLs above which import switches to background processing
IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD = 20
def default_content_type(content_type='text/plain'):
@@ -19,6 +23,62 @@ def default_content_type(content_type='text/plain'):
return decorator
def convert_query_param_to_type(value, schema_property):
"""
Convert a query parameter string to the appropriate type based on schema definition.
Args:
value: String value from query parameter
schema_property: Schema property definition with 'type' or 'anyOf' field
Returns:
Converted value in the appropriate type
"""
# Handle anyOf schemas (extract the first type)
if 'anyOf' in schema_property:
# Use the first non-null type from anyOf
for option in schema_property['anyOf']:
if option.get('type') and option.get('type') != 'null':
prop_type = option.get('type')
break
else:
prop_type = None
else:
prop_type = schema_property.get('type')
# Handle array type (e.g., notification_urls)
if prop_type == 'array':
# Support both comma-separated and JSON array format
if value.startswith('['):
try:
return json.loads(value)
except json.JSONDecodeError:
return [v.strip() for v in value.split(',')]
return [v.strip() for v in value.split(',')]
# Handle object type (e.g., time_between_check, headers)
elif prop_type == 'object':
try:
return json.loads(value)
except json.JSONDecodeError:
raise ValueError(f"Invalid JSON object for field: {value}")
# Handle boolean type
elif prop_type == 'boolean':
return strtobool(value)
# Handle integer type
elif prop_type == 'integer':
return int(value)
# Handle number type (float)
elif prop_type == 'number':
return float(value)
# Default: return as string
return value
class Import(Resource):
def __init__(self, **kwargs):
# datastore is a black box dependency
@@ -28,40 +88,127 @@ class Import(Resource):
@default_content_type('text/plain') #3547 #3542
@validate_openapi_request('importWatches')
def post(self):
"""Import a list of watched URLs."""
"""Import a list of watched URLs with optional watch configuration."""
# Special parameters that are NOT watch configuration
special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'}
extras = {}
# Handle special 'proxy' parameter
if request.args.get('proxy'):
plist = self.datastore.proxy_list
if not request.args.get('proxy') in plist:
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
proxy_list_str = ', '.join(plist) if plist else 'none configured'
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
else:
extras['proxy'] = request.args.get('proxy')
# Handle special 'dedupe' parameter
dedupe = strtobool(request.args.get('dedupe', 'true'))
# Handle special 'tag' and 'tag_uuids' parameters
tags = request.args.get('tag')
tag_uuids = request.args.get('tag_uuids')
if tag_uuids:
tag_uuids = tag_uuids.split(',')
# Extract ALL other query parameters as watch configuration
schema_properties = schema_create_watch.get('properties', {})
for param_name, param_value in request.args.items():
# Skip special parameters
if param_name in special_params:
continue
# Skip if not in schema (unknown parameter)
if param_name not in schema_properties:
return f"Unknown watch configuration parameter: {param_name}", 400
# Convert to appropriate type based on schema
try:
converted_value = convert_query_param_to_type(param_value, schema_properties[param_name])
extras[param_name] = converted_value
except (ValueError, json.JSONDecodeError) as e:
return f"Invalid value for parameter '{param_name}': {str(e)}", 400
# Validate processor if provided
if 'processor' in extras:
from changedetectionio.processors import available_processors
available = [p[0] for p in available_processors()]
if extras['processor'] not in available:
return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
# Validate fetch_backend if provided
if 'fetch_backend' in extras:
from changedetectionio.content_fetchers import available_fetchers
available = [f[0] for f in available_fetchers()]
# Also allow 'system' and extra_browser_* patterns
is_valid = (
extras['fetch_backend'] == 'system' or
extras['fetch_backend'] in available or
extras['fetch_backend'].startswith('extra_browser_')
)
if not is_valid:
return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
# Validate notification_urls if provided
if 'notification_urls' in extras:
from wtforms import ValidationError
from changedetectionio.api.Notifications import validate_notification_urls
try:
validate_notification_urls(extras['notification_urls'])
except ValidationError as e:
return f"Invalid notification_urls: {str(e)}", 400
urls = request.get_data().decode('utf8').splitlines()
added = []
# Clean and validate URLs upfront
urls_to_import = []
for url in urls:
url = url.strip()
if not len(url):
continue
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
# Validate URL
if not is_safe_valid_url(url):
return f"Invalid or unsupported URL - {url}", 400
# Check for duplicates if dedupe is enabled
if dedupe and self.datastore.url_exists(url):
continue
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
added.append(new_uuid)
urls_to_import.append(url)
return added
# For small imports, process synchronously for immediate feedback
if len(urls_to_import) < IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD:
added = []
for url in urls_to_import:
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
added.append(new_uuid)
return added, 200
# For large imports (>= 20), process in background thread
else:
import threading
from loguru import logger
def import_watches_background():
"""Background thread to import watches - discarded after completion."""
try:
added_count = 0
for url in urls_to_import:
try:
self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
added_count += 1
except Exception as e:
logger.error(f"Error importing URL {url}: {e}")
logger.info(f"Background import complete: {added_count} watches created")
except Exception as e:
logger.error(f"Error in background import: {e}")
# Start background thread and return immediately
thread = threading.Thread(target=import_watches_background, daemon=True, name="ImportWatches-Background")
thread.start()
return {'status': f'Importing {len(urls_to_import)} URLs in background', 'count': len(urls_to_import)}, 202

View File

@@ -481,6 +481,7 @@ class CreateWatch(Resource):
'last_error': watch['last_error'],
'link': watch.link,
'page_title': watch['page_title'],
'tags': [*tags], # Unpack dict keys to list (can't use list() since variable named 'list')
'title': watch['title'],
'url': watch['url'],
'viewed': watch.viewed

View File

@@ -848,7 +848,7 @@ def changedetection_app(config=None, datastore_o=None):
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
# Initialize Socket.IO server conditionally based on settings
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
socket_io_enabled = datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True)
if socket_io_enabled and app.config.get('batch_mode'):
socket_io_enabled = False
if socket_io_enabled:

View File

@@ -20,11 +20,9 @@ See: Watch.py model docstring for full Pydantic architecture explanation
See: processors/restock_diff/processor.py:184-192 for current manual implementation
"""
import os
from changedetectionio.model import watch_base
from changedetectionio.model.persistence import EntityPersistenceMixin
class model(EntityPersistenceMixin, watch_base):
"""
Tag domain model - groups watches and can override their settings.

View File

@@ -2,7 +2,7 @@ import os
import uuid
from changedetectionio import strtobool
from .persistence import EntityPersistenceMixin
from .persistence import EntityPersistenceMixin, _determine_entity_type
__all__ = ['EntityPersistenceMixin', 'watch_base']
@@ -510,7 +510,10 @@ class watch_base(dict):
# Save to disk via subclass implementation
try:
# Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
entity_type = _determine_entity_type(self.__class__)
filename = f"{entity_type}.json"
self._save_to_disk(data_dict, uuid)
logger.debug(f"Committed {self.__class__.__name__.lower()} {uuid}")
logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
except Exception as e:
logger.error(f"Failed to commit {uuid}: {e}")

View File

@@ -184,7 +184,8 @@ $(document).ready(function() {
}
// If it's a button in a form, submit the form
else if ($element.is('button')) {
$element.closest('form').submit();
// Use requestSubmit() to include the button's name/value in the form data
$element.closest('form')[0].requestSubmit($element[0]);
}
}
};

View File

@@ -33,9 +33,8 @@ except ImportError:
from ..processors import get_custom_watch_obj_for_processor
# Import the base class and helpers
from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_watch_atomic, save_tag_atomic, save_json_atomic
from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_json_atomic
from .updates import DatastoreUpdatesMixin
from .legacy_loader import has_legacy_datastore
# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
@@ -78,7 +77,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
logger.info(f"Backing up changedetection.json due to new version to '{db_path_version_backup}'.")
copyfile(db_path, db_path_version_backup)
def _load_settings(self):
def _load_settings(self, filename="changedetection.json"):
"""
Load settings from storage.
@@ -87,7 +86,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
Returns:
dict: Settings data loaded from storage
"""
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
changedetection_json = os.path.join(self.datastore_path, filename)
logger.info(f"Loading settings from {changedetection_json}")
@@ -122,11 +121,23 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
if 'application' in settings_data['settings']:
self.__data['settings']['application'].update(settings_data['settings']['application'])
# More or less for the old format which had this data in the one url-watches.json
# cant hurt to leave it here,
if 'watching' in settings_data:
self.__data['watching'].update(settings_data['watching'])
def _rehydrate_tags(self):
"""Rehydrate tag entities from stored data."""
"""Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
from ..model import Tag
for uuid, tag in self.__data['settings']['application']['tags'].items():
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(
uuid, tag, processor_override='restock_diff'
# Force processor to restock_diff for override functionality (technical debt)
tag['processor'] = 'restock_diff'
self.__data['settings']['application']['tags'][uuid] = Tag.model(
datastore_path=self.datastore_path,
__datastore=self.__data,
default=tag
)
logger.info(f"Tag: {uuid} {tag['title']}")
@@ -139,23 +150,28 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
logger.info(f"Rehydrating {watch_count} watches...")
watching_rehydrated = {}
for uuid, watch_dict in self.__data.get('watching', {}).items():
watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
if isinstance(watch_dict, dict):
watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
else:
logger.error(f"Watch UUID {uuid} already rehydrated")
self.__data['watching'] = watching_rehydrated
logger.success(f"Rehydrated {watch_count} watches into Watch objects")
def _load_state(self):
def _load_state(self, main_settings_filename="changedetection.json"):
"""
Load complete datastore state from storage.
Orchestrates loading of settings, watches, and tags using polymorphic methods.
"""
# Load settings
settings_data = self._load_settings()
settings_data = self._load_settings(filename=main_settings_filename)
self._apply_settings(settings_data)
# Load watches (polymorphic - parent class method)
# Load watches, scan them from the disk
self._load_watches()
self._rehydrate_watches()
# Load tags from individual tag.json files
# These will override any tags in settings (migration path)
@@ -193,88 +209,73 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Check if datastore already exists
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
changedetection_json_old_schema = os.path.join(self.datastore_path, "url-watches.json")
if os.path.exists(changedetection_json):
# Load existing datastore (changedetection.json + watch.json files)
logger.info("Loading existing datastore")
try:
self._load_state()
except Exception as e:
logger.critical(f"Failed to load datastore: {e}")
raise
# Run schema updates if needed
# Pass current schema version from loaded datastore (defaults to 0 if not set)
# Load existing datastore (changedetection.json + watch.json files)
logger.info("Loading existing datastore")
self._load_state()
current_schema = self.data['settings']['application'].get('schema_version', 0)
self.run_updates(current_schema_version=current_schema)
# Legacy datastore detected - trigger migration, even works if the schema is much before the migration step.
elif os.path.exists(changedetection_json_old_schema):
logger.critical(f"Legacy datastore detected at {changedetection_json_old_schema}, loading and running updates")
self._load_state(main_settings_filename="url-watches.json")
# update 26 will load the whole old config from disk to __data
current_schema = self.__data['settings']['application'].get('schema_version', 0)
self.run_updates(current_schema_version=current_schema)
# Probably tags were also shifted to disk and many other changes, so best to reload here.
self._load_state()
else:
# No datastore yet - check if this is a fresh install or legacy migration
# Generate app_guid FIRST (required for all operations)
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
else:
self.__data['app_guid'] = str(uuid_builder.uuid4())
self.init_fresh_install(include_default_watches=include_default_watches,
version_tag=version_tag)
# Generate RSS access token
self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
def init_fresh_install(self, include_default_watches, version_tag):
# Generate app_guid FIRST (required for all operations)
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
else:
self.__data['app_guid'] = str(uuid_builder.uuid4())
# Generate API access token
self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
# Generate RSS access token
self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)
# Check if legacy datastore exists (url-watches.json)
if has_legacy_datastore(self.datastore_path):
# Legacy datastore detected - trigger migration
logger.critical(f"Legacy datastore detected at {self.datastore_path}/url-watches.json")
logger.critical("Migration will be triggered via update_26")
# Generate API access token
self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
# Load the legacy datastore
from .legacy_loader import load_legacy_format
legacy_path = os.path.join(self.datastore_path, "url-watches.json")
legacy_data = load_legacy_format(legacy_path)
# Set schema version to latest (no updates needed)
latest_update_available = self.get_updates_available().pop()
logger.info(f"Marking fresh install to schema version {latest_update_available}")
self.__data['settings']['application']['schema_version'] = latest_update_available
if not legacy_data:
raise Exception("Failed to load legacy datastore from url-watches.json")
# Add default watches if requested
if include_default_watches:
self.add_watch(
url='https://news.ycombinator.com/',
tag='Tech news',
extras={'fetch_backend': 'html_requests'}
)
self.add_watch(
url='https://changedetection.io/CHANGELOG.txt',
tag='changedetection.io',
extras={'fetch_backend': 'html_requests'}
)
# Store the loaded data
self.__data = legacy_data
# CRITICAL: Rehydrate watches from dicts into Watch objects
# This ensures watches have their methods available during migration
self._rehydrate_watches()
# update_26 will save watches to individual files and create changedetection.json
# Next startup will load from new format normally
self.run_updates()
# Create changedetection.json immediately
try:
self._save_settings()
logger.info("Created changedetection.json for new datastore")
except Exception as e:
logger.error(f"Failed to create initial changedetection.json: {e}")
else:
# Fresh install - create new datastore
logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
# Set schema version to latest (no updates needed)
updates_available = self.get_updates_available()
self.__data['settings']['application']['schema_version'] = updates_available.pop() if updates_available else 26
# Add default watches if requested
if include_default_watches:
self.add_watch(
url='https://news.ycombinator.com/',
tag='Tech news',
extras={'fetch_backend': 'html_requests'}
)
self.add_watch(
url='https://changedetection.io/CHANGELOG.txt',
tag='changedetection.io',
extras={'fetch_backend': 'html_requests'}
)
# Create changedetection.json immediately
try:
self._save_settings()
logger.info("Created changedetection.json for new datastore")
except Exception as e:
logger.error(f"Failed to create initial changedetection.json: {e}")
# Set version tag
self.__data['version_tag'] = version_tag
@@ -340,24 +341,21 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
"""
Build settings data structure for saving.
Tags are excluded - they are stored in individual {uuid}/tag.json files.
This keeps changedetection.json small and allows atomic tag updates.
Tags behavior depends on schema version:
- Before update_28 (schema < 28): Tags saved in settings for migration
- After update_28 (schema >= 28): Tags excluded from settings (in individual files)
Returns:
dict: Settings data ready for serialization (without tags)
dict: Settings data ready for serialization
"""
import copy
# Deep copy settings to avoid modifying the original
settings_copy = copy.deepcopy(self.__data['settings'])
# Replace tags dict with empty dict (tags are in individual tag.json files)
# We keep the empty dict for backwards compatibility and clear structure
settings_copy['application']['tags'] = {}
return {
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
'app_guid': self.__data['app_guid'],
'app_guid': self.__data.get('app_guid'),
'settings': settings_copy,
'build_sha': self.__data.get('build_sha'),
'version_tag': self.__data.get('version_tag')
@@ -386,15 +384,14 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
Implementation of abstract method from FileSavingDataStore.
Delegates to helper function and stores results in internal data structure.
"""
watching = load_all_watches(
self.datastore_path,
self.rehydrate_entity
)
# Store loaded data
self.__data['watching'] = watching
logger.debug(f"Loaded {len(watching)} watches")
# @note this will also work for the old legacy format because self.__data['watching'] should already have them loaded by this point.
self.__data['watching'].update(load_all_watches(
self.datastore_path,
self.rehydrate_entity
))
logger.debug(f"Loaded {len(self.__data['watching'])} watches")
def _load_tags(self):
"""
@@ -403,9 +400,22 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
File backend implementation: reads individual tag.json files.
Tags loaded from files override any tags in settings (migration path).
"""
from ..model import Tag
def rehydrate_tag(uuid, entity_dict):
"""Rehydrate tag as Tag object with forced restock_diff processor."""
entity_dict['uuid'] = uuid
entity_dict['processor'] = 'restock_diff' # Force processor for override functionality
return Tag.model(
datastore_path=self.datastore_path,
__datastore=self.__data,
default=entity_dict
)
tags = load_all_tags(
self.datastore_path,
self.rehydrate_entity
rehydrate_tag
)
# Override settings tags with loaded tags

View File

@@ -207,15 +207,6 @@ def save_watch_atomic(watch_dir, uuid, watch_dict):
save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)
def save_tag_atomic(tag_dir, uuid, tag_dict):
"""
Save a tag to disk using atomic write pattern.
Convenience wrapper around save_entity_atomic for tags.
Kept for backwards compatibility.
"""
save_entity_atomic(tag_dir, uuid, tag_dict, "tag.json", "tag", max_size_mb=1)
def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
"""
@@ -227,8 +218,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
rehydrate_entity_func: Function to convert dict to Watch object
Returns:
Tuple of (Watch object, raw_data_dict) or (None, None) if failed
The raw_data_dict is needed to compute the hash before rehydration
Watch object or None if failed
"""
try:
# Check file size before reading
@@ -241,7 +231,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
f"File: {watch_json}. This indicates a bug or data corruption. "
f"Watch will be skipped."
)
return None, None
return None
if HAS_ORJSON:
with open(watch_json, 'rb') as f:
@@ -250,10 +240,9 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
with open(watch_json, 'r', encoding='utf-8') as f:
watch_data = json.load(f)
# Return both the raw data and the rehydrated watch
# Raw data is needed to compute hash before rehydration changes anything
# Rehydrate and return watch object
watch_obj = rehydrate_entity_func(uuid, watch_data)
return watch_obj, watch_data
return watch_obj
except json.JSONDecodeError as e:
logger.critical(
@@ -261,7 +250,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
f"File: {watch_json}. Error: {e}. "
f"Watch will be skipped and may need manual recovery from backup."
)
return None, None
return None
except ValueError as e:
# orjson raises ValueError for invalid JSON
if "invalid json" in str(e).lower() or HAS_ORJSON:
@@ -270,15 +259,15 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
f"File: {watch_json}. Error: {e}. "
f"Watch will be skipped and may need manual recovery from backup."
)
return None, None
return None
# Re-raise if it's not a JSON parsing error
raise
except FileNotFoundError:
logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
return None, None
return None
except Exception as e:
logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
return None, None
return None
def load_all_watches(datastore_path, rehydrate_entity_func):
@@ -318,8 +307,8 @@ def load_all_watches(datastore_path, rehydrate_entity_func):
for watch_json in watch_files:
# Extract UUID from path: /datastore/{uuid}/watch.json
uuid_dir = os.path.basename(os.path.dirname(watch_json))
watch, raw_data = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
if watch and raw_data:
watch = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
if watch:
watching[uuid_dir] = watch
loaded += 1
@@ -375,8 +364,10 @@ def load_tag_from_file(tag_json, uuid, rehydrate_entity_func):
with open(tag_json, 'r', encoding='utf-8') as f:
tag_data = json.load(f)
tag_data['processor'] = 'restock_diff'
# Rehydrate tag (convert dict to Tag object)
tag_obj = rehydrate_entity_func(uuid, tag_data, processor_override='restock_diff')
# processor_override is set inside the rehydration function
tag_obj = rehydrate_entity_func(uuid, tag_data)
return tag_obj
except json.JSONDecodeError as e:

View File

@@ -1,66 +0,0 @@
"""
Legacy format loader for url-watches.json.
Provides functions to detect and load from the legacy monolithic JSON format.
Used during migration (update_26) to transition to individual watch.json files.
"""
import os
import json
from loguru import logger
# Try to import orjson for faster JSON serialization
try:
import orjson
HAS_ORJSON = True
except ImportError:
HAS_ORJSON = False
def has_legacy_datastore(datastore_path):
"""
Check if a legacy url-watches.json file exists.
This is used by update_26 to determine if migration is needed.
Args:
datastore_path: Path to datastore directory
Returns:
bool: True if url-watches.json exists
"""
url_watches_json = os.path.join(datastore_path, "url-watches.json")
return os.path.exists(url_watches_json)
def load_legacy_format(json_store_path):
"""
Load datastore from legacy url-watches.json format.
Args:
json_store_path: Full path to url-watches.json file
Returns:
dict: Loaded datastore data with 'watching', 'settings', etc.
None: If file doesn't exist or loading failed
"""
logger.info(f"Loading from legacy format: {json_store_path}")
if not os.path.isfile(json_store_path):
logger.warning(f"Legacy file not found: {json_store_path}")
return None
try:
if HAS_ORJSON:
with open(json_store_path, 'rb') as f:
data = orjson.loads(f.read())
else:
with open(json_store_path, 'r', encoding='utf-8') as f:
data = json.load(f)
logger.info(f"Loaded {len(data.get('watching', {}))} watches from legacy format")
return data
except Exception as e:
logger.error(f"Failed to load legacy format: {e}")
return None

View File

@@ -16,12 +16,18 @@ import time
from loguru import logger
from copy import deepcopy
# Try to import orjson for faster JSON serialization
try:
import orjson
HAS_ORJSON = True
except ImportError:
HAS_ORJSON = False
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
from ..processors.restock_diff import Restock
from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from .file_saving_datastore import save_watch_atomic
def create_backup_tarball(datastore_path, update_number):
"""
@@ -97,7 +103,7 @@ def create_backup_tarball(datastore_path, update_number):
tar.add(tag_json, arcname=f"{entry}/tag.json")
tag_count += 1
logger.success(f"Backup created: {backup_filename} ({watch_count} watches, {tag_count} tags)")
logger.success(f"Backup created: {backup_filename} ({watch_count} watches from disk, {tag_count} tags from disk)")
return backup_path
except Exception as e:
@@ -137,6 +143,7 @@ class DatastoreUpdatesMixin:
return updates_available
def run_updates(self, current_schema_version=None):
import sys
"""
Run all pending schema updates sequentially.
@@ -154,12 +161,29 @@ class DatastoreUpdatesMixin:
2. For each update > current schema version:
- Create backup of datastore
- Run update method
- Update schema version
- Mark settings and watches dirty
- Update schema version and commit settings
- Commit all watches and tags
3. If any update fails, stop processing
4. Save all changes immediately
4. All changes saved via individual .commit() calls
"""
updates_available = self.get_updates_available()
if self.data.get('watching'):
test_watch = self.data['watching'].get(next(iter(self.data.get('watching', {}))))
from ..model.Watch import model
if not isinstance(test_watch, model):
import sys
logger.critical("Cannot run updates! Watch structure must be re-hydrated back to a Watch model object!")
sys.exit(1)
if self.data['settings']['application'].get('tags',{}):
test_tag = self.data['settings']['application'].get('tags',{}).get(next(iter(self.data['settings']['application'].get('tags',{}))))
from ..model.Tag import model as tag_model
if not isinstance(test_tag, tag_model):
import sys
logger.critical("Cannot run updates! Watch tag/group structure must be re-hydrated back to a Tag model object!")
sys.exit(1)
# Determine current schema version
if current_schema_version is None:
@@ -201,44 +225,15 @@ class DatastoreUpdatesMixin:
try:
update_method = getattr(self, f"update_{update_n}")()
except Exception as e:
logger.error(f"Error while trying update_{update_n}")
logger.error(e)
# Don't run any more updates
return
logger.critical(f"Error while trying update_{update_n}")
logger.exception(e)
sys.exit(1)
else:
# Bump the version, important
# Bump the version
self.data['settings']['application']['schema_version'] = update_n
self.commit()
# CRITICAL: Save all watches so changes are persisted
# Most updates modify watches, and in the new individual watch.json structure,
# we need to ensure those changes are saved
logger.info(f"Saving all {len(self.data['watching'])} watches after update_{update_n} (so that it saves them to disk)")
for uuid in self.data['watching'].keys():
self.data['watching'][uuid].commit()
# CRITICAL: Save all tags so changes are persisted
# After update_27, tags have individual tag.json files
# For updates before update_27, this will fail silently (tags don't have commit() yet)
tags = self.data['settings']['application'].get('tags', {})
if tags and update_n >= 27:
logger.info(f"Saving all {len(tags)} tags after update_{update_n}")
for uuid in tags.keys():
try:
tags[uuid].commit()
except AttributeError:
# Tag doesn't have commit() method yet (pre-update_27)
pass
# Save changes immediately after each update (more resilient than batching)
logger.critical(f"Saving all changes after update_{update_n}")
try:
self._save_dirty_items()
logger.success(f"Update {update_n} changes saved successfully")
except Exception as e:
logger.error(f"Failed to save update_{update_n} changes: {e}")
# Don't raise - update already ran, but changes might not be persisted
# The update will try to run again on next startup
logger.success(f"Update {update_n} completed")
# Track which updates ran
updates_ran.append(update_n)
@@ -488,6 +483,14 @@ class DatastoreUpdatesMixin:
del self.data['watching'][uuid]['extract_title_as_title']
if self.data['settings']['application'].get('extract_title_as_title'):
# Ensure 'ui' key exists (defensive for edge cases where base_config merge didn't happen)
if 'ui' not in self.data['settings']['application']:
self.data['settings']['application']['ui'] = {
'use_page_title_in_list': True,
'open_diff_in_new_tab': True,
'socket_io_enabled': True,
'favicons_enabled': True
}
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
def update_21(self):
@@ -575,27 +578,6 @@ class DatastoreUpdatesMixin:
logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
logger.critical("=" * 80)
# Check if already migrated
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
if os.path.exists(changedetection_json):
logger.info("Migration already completed (changedetection.json exists), skipping")
return
# Check if we need to load legacy data
from .legacy_loader import has_legacy_datastore, load_legacy_format
if not has_legacy_datastore(self.datastore_path):
logger.info("No legacy datastore found, nothing to migrate")
return
# Load legacy data from url-watches.json
logger.critical("Loading legacy datastore from url-watches.json...")
legacy_path = os.path.join(self.datastore_path, "url-watches.json")
legacy_data = load_legacy_format(legacy_path)
if not legacy_data:
raise Exception("Failed to load legacy datastore from url-watches.json")
# Populate settings from legacy data
logger.info("Populating settings from legacy data...")
watch_count = len(self.data['watching'])
@@ -607,9 +589,7 @@ class DatastoreUpdatesMixin:
saved_count = 0
for uuid, watch in self.data['watching'].items():
try:
watch_dict = dict(watch)
watch_dir = os.path.join(self.datastore_path, uuid)
save_watch_atomic(watch_dir, uuid, watch_dict)
watch.commit()
saved_count += 1
if saved_count % 100 == 0:
@@ -655,36 +635,20 @@ class DatastoreUpdatesMixin:
# Phase 4: Verify settings file exists
logger.critical("Phase 4/4: Verifying changedetection.json exists...")
changedetection_json_new_schema=os.path.join(self.datastore_path, "changedetection.json")
if not os.path.isfile(changedetection_json_new_schema):
import sys
logger.critical("Migration failed, changedetection.json not found after update ran!")
sys.exit(1)
if not os.path.isfile(changedetection_json):
raise Exception(
"Migration failed: changedetection.json not found after save. "
"url-watches.json remains intact, safe to retry."
)
logger.critical("Phase 4 complete: Verified changedetection.json exists")
# Success! Now reload from new format
logger.critical("Reloading datastore from new format...")
self._load_state() # Includes load_watches
# write it to disk, it will be saved without ['watching'] in the JSON db because we find it from disk glob
self._save_settings()
logger.success("Datastore reloaded from new format successfully")
# Verify all watches have hashes after migration
missing_hashes = [uuid for uuid in self.data['watching'].keys() if uuid not in self._watch_hashes]
if missing_hashes:
logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after migration: {missing_hashes[:5]}")
else:
logger.success(f"All {len(self.data['watching'])} watches have valid hashes after migration")
# Set schema version to latest available update
# This prevents re-running updates and re-marking all watches as dirty
updates_available = self.get_updates_available()
latest_schema = updates_available[-1] if updates_available else 26
self.data['settings']['application']['schema_version'] = latest_schema
self.commit()
logger.info(f"Set schema_version to {latest_schema} (migration complete, all watches already saved)")
logger.critical("=" * 80)
logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
logger.critical("=" * 80)
@@ -705,22 +669,24 @@ class DatastoreUpdatesMixin:
def update_26(self):
self.migrate_legacy_db_format()
def update_27(self):
def update_28(self):
"""
Migrate tags to individual tag.json files.
Tags are currently saved as part of changedetection.json (settings).
This migration moves them to individual {uuid}/tag.json files,
similar to how watches are stored.
Tags are currently saved only in changedetection.json (settings).
This migration ALSO saves them to individual {uuid}/tag.json files,
similar to how watches are stored (dual storage).
Benefits:
- Reduces changedetection.json size
- Allows atomic tag updates without rewriting entire settings
- Enables independent tag versioning/backup
- Maintains backwards compatibility (tags stay in settings too)
"""
# Force save as tag.json (not watch.json) even if object is corrupted
logger.critical("=" * 80)
logger.critical("Running migration: Individual tag persistence (update_27)")
logger.critical("Moving tags from settings to individual tag.json files")
logger.critical("Running migration: Individual tag persistence (update_28)")
logger.critical("Creating individual tag.json files")
logger.critical("=" * 80)
tags = self.data['settings']['application'].get('tags', {})
@@ -735,17 +701,15 @@ class DatastoreUpdatesMixin:
saved_count = 0
failed_count = 0
for uuid, tag in tags.items():
for uuid, tag_data in tags.items():
try:
# Save tag to its own file
tag.commit()
tag_data.commit()
saved_count += 1
if saved_count % 10 == 0:
logger.info(f" Progress: {saved_count}/{tag_count} tags migrated...")
except Exception as e:
logger.error(f"Failed to save tag {uuid} ({tag.get('title', 'unknown')}): {e}")
logger.error(f"Failed to save tag {uuid} ({tag_data.get('title', 'unknown')}): {e}")
failed_count += 1
if failed_count > 0:
@@ -753,9 +717,9 @@ class DatastoreUpdatesMixin:
else:
logger.success(f"Migration complete: {saved_count} tags saved to individual tag.json files")
# Tags remain in settings for backwards compatibility
# On next load, _load_tags() will read from tag.json files and override settings
logger.info("Tags remain in settings for backwards compatibility")
logger.info("Future tag edits will save to tag.json files only")
# Tags remain in settings for backwards compatibility AND easy access
# On next load, _load_tags() will read from tag.json files and merge with settings
logger.info("Tags saved to both settings AND individual tag.json files")
logger.info("Future tag edits will update both locations (dual storage)")
logger.critical("=" * 80)
logger.critical("=" * 80)

View File

@@ -489,6 +489,7 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Test 1: Basic import with tag
res = client.post(
url_for("import") + "?tag=import-test",
data='https://website1.com\r\nhttps://website2.com',
@@ -507,6 +508,209 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
res = client.get(url_for('tags.tags_overview_page'))
assert b'import-test' in res.data
# Test 2: Import with watch configuration fields (issue #3845)
# Test string field (include_filters), boolean (paused), and processor
import urllib.parse
params = urllib.parse.urlencode({
'tag': 'config-test',
'include_filters': 'div.content',
'paused': 'true',
'processor': 'text_json_diff',
'title': 'Imported with Config'
})
res = client.post(
url_for("import") + "?" + params,
data='https://website3.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 200
assert len(res.json) == 1
uuid = res.json[0]
# Verify the configuration was applied
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
assert watch['include_filters'] == ['div.content'], "include_filters should be set as array"
assert watch['paused'] == True, "paused should be True"
assert watch['processor'] == 'text_json_diff', "processor should be set"
assert watch['title'] == 'Imported with Config', "title should be set"
# Test 3: Import with array field (notification_urls) - using valid Apprise format
params = urllib.parse.urlencode({
'tag': 'notification-test',
'notification_urls': 'mailto://test@example.com,mailto://admin@example.com'
})
res = client.post(
url_for("import") + "?" + params,
data='https://website4.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 200
uuid = res.json[0]
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
assert 'mailto://test@example.com' in watch['notification_urls'], "notification_urls should contain first email"
assert 'mailto://admin@example.com' in watch['notification_urls'], "notification_urls should contain second email"
# Test 4: Import with object field (time_between_check)
import json
time_config = json.dumps({"hours": 2, "minutes": 30})
params = urllib.parse.urlencode({
'tag': 'schedule-test',
'time_between_check': time_config
})
res = client.post(
url_for("import") + "?" + params,
data='https://website5.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 200
uuid = res.json[0]
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
assert watch['time_between_check']['hours'] == 2, "time_between_check hours should be 2"
assert watch['time_between_check']['minutes'] == 30, "time_between_check minutes should be 30"
# Test 5: Import with invalid processor (should fail)
res = client.post(
url_for("import") + "?processor=invalid_processor",
data='https://website6.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 400, "Should reject invalid processor"
assert b"Invalid processor" in res.data, "Error message should mention invalid processor"
# Test 6: Import with invalid field (should fail)
res = client.post(
url_for("import") + "?unknown_field=value",
data='https://website7.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 400, "Should reject unknown field"
assert b"Unknown watch configuration parameter" in res.data, "Error message should mention unknown parameter"
def test_api_import_small_synchronous(client, live_server, measure_memory_usage, datastore_path):
"""Test that small imports (< threshold) are processed synchronously"""
from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Use local test endpoint to avoid network delays
test_url_base = url_for('test_endpoint', _external=True)
# Create URLs: threshold - 1 to stay under limit
num_urls = min(5, IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD - 1) # Use small number for faster test
urls = '\n'.join([f'{test_url_base}?id=small-{i}' for i in range(num_urls)])
# Import small batch
res = client.post(
url_for("import") + "?tag=small-test",
data=urls,
headers={'x-api-key': api_key},
follow_redirects=True
)
# Should return 200 OK with UUID list (synchronous)
assert res.status_code == 200, f"Should return 200 for small imports, got {res.status_code}"
assert isinstance(res.json, list), "Response should be a list of UUIDs"
assert len(res.json) == num_urls, f"Should return {num_urls} UUIDs, got {len(res.json)}"
# Verify all watches were created immediately
for uuid in res.json:
assert uuid in live_server.app.config['DATASTORE'].data['watching'], \
f"Watch {uuid} should exist immediately after synchronous import"
print(f"\n✓ Successfully created {num_urls} watches synchronously")
def test_api_import_large_background(client, live_server, measure_memory_usage, datastore_path):
"""Test that large imports (>= threshold) are processed in background thread"""
from changedetectionio.api.Import import IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD
import time
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Use local test endpoint to avoid network delays
test_url_base = url_for('test_endpoint', _external=True)
# Create URLs: threshold + 10 to trigger background processing
num_urls = IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD + 10
urls = '\n'.join([f'{test_url_base}?id=bulk-{i}' for i in range(num_urls)])
# Import large batch
res = client.post(
url_for("import") + "?tag=bulk-test",
data=urls,
headers={'x-api-key': api_key},
follow_redirects=True
)
# Should return 202 Accepted (background processing)
assert res.status_code == 202, f"Should return 202 for large imports, got {res.status_code}"
assert b"background" in res.data.lower(), "Response should mention background processing"
# Extract expected count from response
response_json = res.json
assert 'count' in response_json, "Response should include count"
assert response_json['count'] == num_urls, f"Count should be {num_urls}, got {response_json['count']}"
# Wait for background thread to complete (with timeout)
max_wait = 10 # seconds
wait_interval = 0.5
elapsed = 0
watches_created = 0
while elapsed < max_wait:
time.sleep(wait_interval)
elapsed += wait_interval
# Count how many watches have been created
watches_created = len([
uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
if 'id=bulk-' in watch['url']
])
if watches_created == num_urls:
break
# Verify all watches were created
assert watches_created == num_urls, \
f"Expected {num_urls} watches to be created, but found {watches_created} after {elapsed}s"
# Verify watches have correct configuration
bulk_watches = [
watch for watch in live_server.app.config['DATASTORE'].data['watching'].values()
if 'id=bulk-' in watch['url']
]
assert len(bulk_watches) == num_urls, "All bulk watches should exist"
# Check that they have the correct tag
datastore = live_server.app.config['DATASTORE']
# Get UUIDs of bulk watches by filtering the datastore keys
bulk_watch_uuids = [
uuid for uuid, watch in live_server.app.config['DATASTORE'].data['watching'].items()
if 'id=bulk-' in watch['url']
]
for watch_uuid in bulk_watch_uuids:
tags = datastore.get_all_tags_for_watch(uuid=watch_uuid)
tag_names = [t['title'] for t in tags.values()]
assert 'bulk-test' in tag_names, f"Watch {watch_uuid} should have 'bulk-test' tag"
print(f"\n✓ Successfully created {num_urls} watches in background (took {elapsed}s)")
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):

View File

@@ -18,7 +18,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
url_for("tags"),
headers={'x-api-key': api_key}
)
assert res.text.strip() == "{}", "Should be empty list"
assert res.get_data(as_text=True).strip() == "{}", "Should be empty list"
assert res.status_code == 200
res = client.post(
@@ -36,7 +36,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
headers={'x-api-key': api_key}
)
assert res.status_code == 200
assert new_tag_uuid in res.text
assert new_tag_uuid in res.get_data(as_text=True)
assert res.json[new_tag_uuid]['title'] == tag_title
assert res.json[new_tag_uuid]['notification_muted'] == False
@@ -118,6 +118,16 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
assert res.status_code == 200
assert new_tag_uuid in res.json.get('tags', [])
# Test that tags are returned when listing ALL watches (issue #3854)
res = client.get(
url_for("createwatch"), # GET /api/v1/watch - list all watches
headers={'x-api-key': api_key}
)
assert res.status_code == 200
assert watch_uuid in res.json, "Watch should be in the list"
assert 'tags' in res.json[watch_uuid], "Tags field should be present in watch list"
assert new_tag_uuid in res.json[watch_uuid]['tags'], "Tag UUID should be in tags array"
# Check recheck by tag
before_check_time = live_server.app.config['DATASTORE'].data['watching'][watch_uuid].get('last_checked')
time.sleep(1)
@@ -148,7 +158,7 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
headers={'x-api-key': api_key}
)
assert res.status_code == 200
assert new_tag_uuid not in res.text
assert new_tag_uuid not in res.get_data(as_text=True)
# Verify tag was removed from watch
res = client.get(

View File

@@ -5,6 +5,8 @@ from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches
import os
from ..store import ChangeDetectionStore
# def test_setup(client, live_server, measure_memory_usage, datastore_path):
# live_server_setup(live_server) # Setup on conftest per function
@@ -487,7 +489,6 @@ def test_tag_json_persistence(client, live_server, measure_memory_usage, datasto
- Tag deletion removes tag.json file
"""
import json
from changedetectionio.store import ChangeDetectionStore
datastore = client.application.config.get('DATASTORE')
@@ -569,9 +570,6 @@ def test_tag_json_migration_update_27(client, live_server, measure_memory_usage,
This simulates a pre-update_27 datastore and verifies migration works.
"""
import json
from changedetectionio.store import ChangeDetectionStore
datastore = client.application.config.get('DATASTORE')
# 1. Create multiple tags
tag_names = ['migration-tag-1', 'migration-tag-2', 'migration-tag-3']

View File

@@ -28,7 +28,7 @@ info:
For example: `x-api-key: YOUR_API_KEY`
version: 0.1.4
version: 0.1.5
contact:
name: ChangeDetection.io
url: https://github.com/dgtlmoon/changedetection.io
@@ -1503,46 +1503,92 @@ paths:
post:
operationId: importWatches
tags: [Import]
summary: Import watch URLs
description: Import a list of URLs to monitor. Accepts line-separated URLs in request body.
summary: Import watch URLs with configuration
description: |
Import a list of URLs to monitor with optional watch configuration. Accepts line-separated URLs in request body.
**Configuration via Query Parameters:**
You can pass ANY watch configuration field as query parameters to apply settings to all imported watches.
All parameters from the Watch schema are supported (processor, fetch_backend, notification_urls, etc.).
**Special Parameters:**
- `tag` / `tag_uuids` - Assign tags to imported watches
- `proxy` - Use specific proxy for imported watches
- `dedupe` - Skip duplicate URLs (default: true)
**Type Conversion:**
- Booleans: `true`, `false`, `1`, `0`, `yes`, `no`
- Arrays: Comma-separated or JSON format (`[item1,item2]`)
- Objects: JSON format (`{"key":"value"}`)
- Numbers: Parsed as int or float
x-code-samples:
- lang: 'curl'
source: |
# Basic import
curl -X POST "http://localhost:5000/api/v1/import" \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: text/plain" \
-d $'https://example.com\nhttps://example.org\nhttps://example.net'
# Import with processor and fetch backend
curl -X POST "http://localhost:5000/api/v1/import?processor=restock_diff&fetch_backend=html_webdriver" \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: text/plain" \
-d $'https://example.com\nhttps://example.org'
# Import with multiple settings
curl -X POST "http://localhost:5000/api/v1/import?processor=restock_diff&paused=true&tag=production" \
-H "x-api-key: YOUR_API_KEY" \
-H "Content-Type: text/plain" \
-d $'https://example.com'
- lang: 'Python'
source: |
import requests
headers = {
'x-api-key': 'YOUR_API_KEY',
'Content-Type': 'text/plain'
}
# Basic import
urls = 'https://example.com\nhttps://example.org\nhttps://example.net'
response = requests.post('http://localhost:5000/api/v1/import',
response = requests.post('http://localhost:5000/api/v1/import',
headers=headers, data=urls)
print(response.json())
# Import with configuration
params = {
'processor': 'restock_diff',
'fetch_backend': 'html_webdriver',
'paused': 'false',
'tag': 'production'
}
response = requests.post('http://localhost:5000/api/v1/import',
headers=headers, params=params, data=urls)
print(response.json())
parameters:
- name: tag_uuids
in: query
description: Tag UUID to apply to imported web page change monitors (watches)
description: Tag UUID(s) to apply to imported watches (comma-separated for multiple)
schema:
type: string
example: "550e8400-e29b-41d4-a716-446655440000"
- name: tag
in: query
description: Tag name to apply to imported web page change monitors (watches)
description: Tag name to apply to imported watches
schema:
type: string
example: "production"
- name: proxy
in: query
description: Proxy key to use for imported web page change monitors (watches)
description: Proxy key to use for imported watches
schema:
type: string
example: "proxy1"
- name: dedupe
in: query
description: Remove duplicate URLs (default true)
description: Skip duplicate URLs (default true)
schema:
type: boolean
default: true

File diff suppressed because one or more lines are too long