Compare commits

..

6 Commits

Author SHA1 Message Date
dgtlmoon
b5b5e8d3e4 API - Import - Ability to set any watch value as HTTP URL Query value, for example ?processor=restock_diff&time_between_check={'hours':24} Re #3845 2026-02-11 07:07:02 +01:00
dgtlmoon
6e90a0bbd1 UI - Bulk checkbox operations modal confirmation fix Re #3853 2026-02-11 06:29:59 +01:00
dgtlmoon
987789425d Tags update fix (#3849)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-07 17:13:41 +01:00
dgtlmoon
892b645147 Refactor for Tags storage (#3848)
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-07 13:13:02 +01:00
dgtlmoon
278da3fa9b Including uptime in UI settings/info
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-02-07 03:50:49 +01:00
dgtlmoon
c577bd700c Refactor watch saving backend, closes #3846 (#3847) 2026-02-07 03:41:35 +01:00
27 changed files with 1140 additions and 275 deletions

View File

@@ -182,7 +182,6 @@ def main():
from changedetectionio.flask_app import changedetection_app
datastore_path = None
do_cleanup = False
# Set a default logger level
logger_level = 'DEBUG'
include_default_watches = True
@@ -265,7 +264,7 @@ def main():
i += 1
try:
opts, args = getopt.getopt(cleaned_argv[1:], "6Ccsd:h:p:l:P:", "port")
opts, args = getopt.getopt(cleaned_argv[1:], "6Csd:h:p:l:P:", "port")
except getopt.GetoptError as e:
print_help()
print(f'Error: {e}')
@@ -293,10 +292,6 @@ def main():
if opt == '-d':
datastore_path = arg
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
# Create the datadir if it doesnt exist
if opt == '-C':
create_datastore_dir = True
@@ -602,10 +597,6 @@ def main():
else:
logger.info("SIGUSR1 handler only registered on Linux, skipped.")
# Go into cleanup mode
if do_cleanup:
datastore.remove_unused_snapshots()
app.config['datastore_path'] = datastore_path
@@ -614,7 +605,7 @@ def main():
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
has_password=datastore.data['settings']['application']['password'] != False,
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True),
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
all_paused=datastore.data['settings']['application'].get('all_paused', False),
all_muted=datastore.data['settings']['application'].get('all_muted', False)
)

View File

@@ -2,8 +2,9 @@ from changedetectionio.strtobool import strtobool
from flask_restful import abort, Resource
from flask import request
from functools import wraps
from . import auth, validate_openapi_request
from . import auth, validate_openapi_request, schema_create_watch
from ..validate_url import is_safe_valid_url
import json
def default_content_type(content_type='text/plain'):
@@ -19,6 +20,62 @@ def default_content_type(content_type='text/plain'):
return decorator
def convert_query_param_to_type(value, schema_property):
"""
Convert a query parameter string to the appropriate type based on schema definition.
Args:
value: String value from query parameter
schema_property: Schema property definition with 'type' or 'anyOf' field
Returns:
Converted value in the appropriate type
"""
# Handle anyOf schemas (extract the first type)
if 'anyOf' in schema_property:
# Use the first non-null type from anyOf
for option in schema_property['anyOf']:
if option.get('type') and option.get('type') != 'null':
prop_type = option.get('type')
break
else:
prop_type = None
else:
prop_type = schema_property.get('type')
# Handle array type (e.g., notification_urls)
if prop_type == 'array':
# Support both comma-separated and JSON array format
if value.startswith('['):
try:
return json.loads(value)
except json.JSONDecodeError:
return [v.strip() for v in value.split(',')]
return [v.strip() for v in value.split(',')]
# Handle object type (e.g., time_between_check, headers)
elif prop_type == 'object':
try:
return json.loads(value)
except json.JSONDecodeError:
raise ValueError(f"Invalid JSON object for field: {value}")
# Handle boolean type
elif prop_type == 'boolean':
return strtobool(value)
# Handle integer type
elif prop_type == 'integer':
return int(value)
# Handle number type (float)
elif prop_type == 'number':
return float(value)
# Default: return as string
return value
class Import(Resource):
def __init__(self, **kwargs):
# datastore is a black box dependency
@@ -28,25 +85,79 @@ class Import(Resource):
@default_content_type('text/plain') #3547 #3542
@validate_openapi_request('importWatches')
def post(self):
"""Import a list of watched URLs."""
"""Import a list of watched URLs with optional watch configuration."""
# Special parameters that are NOT watch configuration
special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'}
extras = {}
# Handle special 'proxy' parameter
if request.args.get('proxy'):
plist = self.datastore.proxy_list
if not request.args.get('proxy') in plist:
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
proxy_list_str = ', '.join(plist) if plist else 'none configured'
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
else:
extras['proxy'] = request.args.get('proxy')
# Handle special 'dedupe' parameter
dedupe = strtobool(request.args.get('dedupe', 'true'))
# Handle special 'tag' and 'tag_uuids' parameters
tags = request.args.get('tag')
tag_uuids = request.args.get('tag_uuids')
if tag_uuids:
tag_uuids = tag_uuids.split(',')
# Extract ALL other query parameters as watch configuration
schema_properties = schema_create_watch.get('properties', {})
for param_name, param_value in request.args.items():
# Skip special parameters
if param_name in special_params:
continue
# Skip if not in schema (unknown parameter)
if param_name not in schema_properties:
return f"Unknown watch configuration parameter: {param_name}", 400
# Convert to appropriate type based on schema
try:
converted_value = convert_query_param_to_type(param_value, schema_properties[param_name])
extras[param_name] = converted_value
except (ValueError, json.JSONDecodeError) as e:
return f"Invalid value for parameter '{param_name}': {str(e)}", 400
# Validate processor if provided
if 'processor' in extras:
from changedetectionio.processors import available_processors
available = [p[0] for p in available_processors()]
if extras['processor'] not in available:
return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
# Validate fetch_backend if provided
if 'fetch_backend' in extras:
from changedetectionio.content_fetchers import available_fetchers
available = [f[0] for f in available_fetchers()]
# Also allow 'system' and extra_browser_* patterns
is_valid = (
extras['fetch_backend'] == 'system' or
extras['fetch_backend'] in available or
extras['fetch_backend'].startswith('extra_browser_')
)
if not is_valid:
return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
# Validate notification_urls if provided
if 'notification_urls' in extras:
from wtforms import ValidationError
from changedetectionio.api.Notifications import validate_notification_urls
try:
validate_notification_urls(extras['notification_urls'])
except ValidationError as e:
return f"Invalid notification_urls: {str(e)}", 400
urls = request.get_data().decode('utf8').splitlines()
added = []
for url in urls:
@@ -54,13 +165,15 @@ class Import(Resource):
if not len(url):
continue
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
# Validate URL
if not is_safe_valid_url(url):
return f"Invalid or unsupported URL - {url}", 400
# Check for duplicates if dedupe is enabled
if dedupe and self.datastore.url_exists(url):
continue
# Create watch with extras configuration
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
added.append(new_uuid)

View File

@@ -24,8 +24,7 @@ class Tag(Resource):
@validate_openapi_request('getTag')
def get(self, uuid):
"""Get data for a single tag/group, toggle notification muting, or recheck all."""
from copy import deepcopy
tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
if not tag:
abort(404, message=f'No tag exists with the UUID of {uuid}')
@@ -62,12 +61,12 @@ class Tag(Resource):
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
if request.args.get('muted', '') == 'muted':
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
self.datastore.commit()
tag['notification_muted'] = True
tag.commit()
return "OK", 200
elif request.args.get('muted', '') == 'unmuted':
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False
self.datastore.commit()
tag['notification_muted'] = False
tag.commit()
return "OK", 200
return tag
@@ -81,7 +80,17 @@ class Tag(Resource):
# Delete the tag, and any tag reference
del self.datastore.data['settings']['application']['tags'][uuid]
self.datastore.commit()
# Delete tag.json file if it exists
import os
tag_dir = os.path.join(self.datastore.datastore_path, uuid)
tag_json = os.path.join(tag_dir, "tag.json")
if os.path.exists(tag_json):
try:
os.unlink(tag_json)
logger.info(f"Deleted tag.json for tag {uuid}")
except Exception as e:
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
# Remove tag from all watches
for watch_uuid, watch in self.datastore.data['watching'].items():
@@ -111,7 +120,7 @@ class Tag(Resource):
return str(e), 400
tag.update(request.json)
self.datastore.commit()
tag.commit()
return "OK", 200

View File

@@ -374,10 +374,10 @@ class WatchFavicon(Resource):
favicon_filename = watch.get_favicon_filename()
if favicon_filename:
# Use cached MIME type detection
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
filepath = os.path.join(watch.data_dir, favicon_filename)
mime = get_favicon_mime_type(filepath)
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
response.headers['Content-type'] = mime
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
return response

View File

@@ -47,7 +47,7 @@ def create_backup(datastore_path, watches: dict):
# Add any data in the watch data directory.
for uuid, w in watches.items():
for f in Path(w.watch_data_dir).glob('*'):
for f in Path(w.data_dir).glob('*'):
zipObj.write(f,
# Use the full path to access the file, but make the file 'relative' in the Zip.
arcname=os.path.join(f.parts[-2], f.parts[-1]),

View File

@@ -285,8 +285,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
watch = datastore.data['watching'].get(uuid)
filename = f"step_before-{step_n}.jpeg" if request.args.get('type', '') == 'before' else f"step_{step_n}.jpeg"
if step_n and watch and os.path.isfile(os.path.join(watch.watch_data_dir, filename)):
response = make_response(send_from_directory(directory=watch.watch_data_dir, path=filename))
if step_n and watch and os.path.isfile(os.path.join(watch.data_dir, filename)):
response = make_response(send_from_directory(directory=watch.data_dir, path=filename))
response.headers['Content-type'] = 'image/jpeg'
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
response.headers['Pragma'] = 'no-cache'

View File

@@ -1,8 +1,9 @@
import os
from copy import deepcopy
from datetime import datetime
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo, available_timezones
import secrets
import time
import flask_login
from flask import Blueprint, render_template, request, redirect, url_for, flash
from flask_babel import gettext
@@ -142,6 +143,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
active_plugins = get_active_plugins()
python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
# Calculate uptime in seconds
uptime_seconds = time.time() - datastore.start_time
# Get plugin settings tabs and instantiate forms
plugin_tabs = get_plugin_settings_tabs()
plugin_forms = {}
@@ -160,6 +164,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
active_plugins=active_plugins,
api_key=datastore.data['settings']['application'].get('api_access_token'),
python_version=python_version,
uptime_seconds=uptime_seconds,
available_timezones=sorted(available_timezones()),
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),

View File

@@ -394,6 +394,7 @@ nav
{% endfor %}
{% endif %}
<div class="tab-pane-inner" id="info">
<p><strong>{{ _('Uptime:') }}</strong> {{ uptime_seconds|format_duration }}</p>
<p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
<p><strong>{{ _('Plugins active:') }}</strong></p>
{% if active_plugins %}

View File

@@ -57,9 +57,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
@tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
@login_optionally_required
def mute(uuid):
if datastore.data['settings']['application']['tags'].get(uuid):
datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = not datastore.data['settings']['application']['tags'][uuid]['notification_muted']
datastore.commit()
tag = datastore.data['settings']['application']['tags'].get(uuid)
if tag:
tag['notification_muted'] = not tag['notification_muted']
tag.commit()
return redirect(url_for('tags.tags_overview_page'))
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
@@ -69,6 +70,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if datastore.data['settings']['application']['tags'].get(uuid):
del datastore.data['settings']['application']['tags'][uuid]
# Delete tag.json file if it exists
import os
tag_dir = os.path.join(datastore.datastore_path, uuid)
tag_json = os.path.join(tag_dir, "tag.json")
if os.path.exists(tag_json):
try:
os.unlink(tag_json)
logger.info(f"Deleted tag.json for tag {uuid}")
except Exception as e:
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
# Remove tag from all watches in background thread to avoid blocking
def remove_tag_background(tag_uuid):
"""Background thread to remove tag from watches - discarded after completion."""
@@ -115,9 +127,19 @@ def construct_blueprint(datastore: ChangeDetectionStore):
@tags_blueprint.route("/delete_all", methods=['GET'])
@login_optionally_required
def delete_all():
# Delete all tag.json files
import os
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
tag_json = os.path.join(tag_dir, "tag.json")
if os.path.exists(tag_json):
try:
os.unlink(tag_json)
except Exception as e:
logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")
# Clear all tags from settings immediately
datastore.data['settings']['application']['tags'] = {}
datastore.commit()
# Clear tags from all watches in background thread to avoid blocking
def clear_all_tags_background():
@@ -207,10 +229,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if uuid == 'first':
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
default = datastore.data['settings']['application']['tags'].get(uuid)
tag = datastore.data['settings']['application']['tags'].get(uuid)
form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None,
data=default,
data=tag,
extra_notification_tokens=datastore.get_unique_notification_tokens_available()
)
# @todo subclass form so validation works
@@ -219,9 +241,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# flash(','.join(l), 'error')
# return redirect(url_for('tags.form_tag_edit_submit', uuid=uuid))
datastore.data['settings']['application']['tags'][uuid].update(form.data)
datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff'
datastore.commit()
tag.update(form.data)
tag['processor'] = 'restock_diff'
tag.commit()
flash(gettext("Updated"))
return redirect(url_for('tags.tags_overview_page'))

View File

@@ -337,9 +337,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
watch = datastore.data['watching'].get(uuid)
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
if watch and watch.history.keys() and os.path.isdir(watch.data_dir):
latest_filename = list(watch.history.keys())[-1]
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
html_fname = os.path.join(watch.data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
if html_fname.endswith('.br'):
# Read and decompress the Brotli file

View File

@@ -266,6 +266,47 @@ def _jinja2_filter_seconds_precise(timestamp):
return format(int(time.time()-timestamp), ',d')
@app.template_filter('format_duration')
def _jinja2_filter_format_duration(seconds):
"""Format a duration in seconds into human readable string like '5 days, 3 hours, 30 minutes'"""
from datetime import timedelta
if not seconds or seconds < 0:
return gettext('0 seconds')
td = timedelta(seconds=int(seconds))
# Calculate components
years = td.days // 365
remaining_days = td.days % 365
months = remaining_days // 30
remaining_days = remaining_days % 30
weeks = remaining_days // 7
days = remaining_days % 7
hours = td.seconds // 3600
minutes = (td.seconds % 3600) // 60
secs = td.seconds % 60
# Build parts list
parts = []
if years > 0:
parts.append(f"{years} {gettext('year') if years == 1 else gettext('years')}")
if months > 0:
parts.append(f"{months} {gettext('month') if months == 1 else gettext('months')}")
if weeks > 0:
parts.append(f"{weeks} {gettext('week') if weeks == 1 else gettext('weeks')}")
if days > 0:
parts.append(f"{days} {gettext('day') if days == 1 else gettext('days')}")
if hours > 0:
parts.append(f"{hours} {gettext('hour') if hours == 1 else gettext('hours')}")
if minutes > 0:
parts.append(f"{minutes} {gettext('minute') if minutes == 1 else gettext('minutes')}")
if secs > 0 or not parts:
parts.append(f"{secs} {gettext('second') if secs == 1 else gettext('seconds')}")
return ", ".join(parts)
@app.template_filter('fetcher_status_icons')
def _jinja2_filter_fetcher_status_icons(fetcher_name):
"""Get status icon HTML for a given fetcher.
@@ -703,10 +744,10 @@ def changedetection_app(config=None, datastore_o=None):
favicon_filename = watch.get_favicon_filename()
if favicon_filename:
# Use cached MIME type detection
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
filepath = os.path.join(watch.data_dir, favicon_filename)
mime = get_favicon_mime_type(filepath)
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
response.headers['Content-type'] = mime
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
return response
@@ -807,7 +848,7 @@ def changedetection_app(config=None, datastore_o=None):
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
# Initialize Socket.IO server conditionally based on settings
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
socket_io_enabled = datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True)
if socket_io_enabled and app.config.get('batch_mode'):
socket_io_enabled = False
if socket_io_enabled:

View File

@@ -20,10 +20,12 @@ See: Watch.py model docstring for full Pydantic architecture explanation
See: processors/restock_diff/processor.py:184-192 for current manual implementation
"""
import os
from changedetectionio.model import watch_base
from changedetectionio.model.persistence import EntityPersistenceMixin
class model(watch_base):
class model(EntityPersistenceMixin, watch_base):
"""
Tag domain model - groups watches and can override their settings.
@@ -42,11 +44,7 @@ class model(watch_base):
"""
def __init__(self, *arg, **kw):
# Store datastore reference (optional for Tags, but good for consistency)
self.__datastore = kw.get('__datastore')
if kw.get('__datastore'):
del kw['__datastore']
# Parent class (watch_base) handles __datastore and __datastore_path
super(model, self).__init__(*arg, **kw)
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
@@ -54,3 +52,7 @@ class model(watch_base):
if kw.get('default'):
self.update(kw['default'])
del kw['default']
# _save_to_disk() method provided by EntityPersistenceMixin
# commit() and _get_commit_data() methods inherited from watch_base
# Tag uses default _get_commit_data() (includes all keys)

View File

@@ -24,8 +24,6 @@ The dream architecture would use Pydantic for:
See class model docstring for detailed explanation and examples.
See: processors/restock_diff/processor.py:184-192 for manual resolution example
"""
import gc
from copy import copy
from blinker import signal
from changedetectionio.validate_url import is_safe_valid_url
@@ -33,6 +31,7 @@ from changedetectionio.validate_url import is_safe_valid_url
from changedetectionio.strtobool import strtobool
from changedetectionio.jinja2_custom import render as jinja_render
from . import watch_base
from .persistence import EntityPersistenceMixin
import os
import re
from pathlib import Path
@@ -129,7 +128,7 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
raise Exception(f"Brotli compression failed for {filepath}: {e}")
class model(watch_base):
class model(EntityPersistenceMixin, watch_base):
"""
Watch domain model for monitoring URL changes.
@@ -228,16 +227,11 @@ class model(watch_base):
jitter_seconds = 0
def __init__(self, *arg, **kw):
self.__datastore_path = kw.get('datastore_path')
if kw.get('datastore_path'):
del kw['datastore_path']
self.__datastore = kw.get('__datastore')
if not self.__datastore:
# Validate __datastore before calling parent (Watch requires it)
if not kw.get('__datastore'):
raise ValueError("Watch object requires '__datastore' reference - cannot access global settings without it")
if kw.get('__datastore'):
del kw['__datastore']
# Parent class (watch_base) handles __datastore and __datastore_path
super(model, self).__init__(*arg, **kw)
if kw.get('default'):
@@ -265,11 +259,6 @@ class model(watch_base):
def has_unviewed(self):
return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2
def ensure_data_dir_exists(self):
if not os.path.isdir(self.watch_data_dir):
logger.debug(f"> Creating data dir {self.watch_data_dir}")
os.mkdir(self.watch_data_dir)
@property
def link(self):
@@ -325,7 +314,8 @@ class model(watch_base):
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
# But preserve processor config files (they're configuration, not history data)
for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
# Use glob not rglob here for safety.
for item in pathlib.Path(str(self.data_dir)).glob("*.*"):
# Skip processor config files
if item.name in processor_config_files:
continue
@@ -434,11 +424,11 @@ class model(watch_base):
tmp_history = {}
# In the case we are only using the watch for processing without history
if not self.watch_data_dir:
if not self.data_dir:
return []
# Read the history file as a dict
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
fname = os.path.join(self.data_dir, self.history_index_filename)
if os.path.isfile(fname):
logger.debug(f"Reading watch history index for {self.get('uuid')}")
with open(fname, "r", encoding='utf-8') as f:
@@ -451,13 +441,13 @@ class model(watch_base):
# Cross-platform: check for any path separator (works on Windows and Unix)
if os.sep not in v and '/' not in v and '\\' not in v:
# Relative filename only, no path separators
v = os.path.join(self.watch_data_dir, v)
v = os.path.join(self.data_dir, v)
else:
# It's possible that they moved the datadir on older versions
# So the snapshot exists but is in a different path
# Cross-platform: use os.path.basename instead of split('/')
snapshot_fname = os.path.basename(v)
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
proposed_new_path = os.path.join(self.data_dir, snapshot_fname)
if not os.path.exists(v) and os.path.exists(proposed_new_path):
v = proposed_new_path
@@ -474,7 +464,7 @@ class model(watch_base):
@property
def has_history(self):
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
fname = os.path.join(self.data_dir, self.history_index_filename)
return os.path.isfile(fname)
@property
@@ -580,7 +570,7 @@ class model(watch_base):
def _write_atomic(self, dest, data, mode='wb'):
"""Write data atomically to dest using a temp file"""
import tempfile
with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.watch_data_dir) as tmp:
with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.data_dir) as tmp:
tmp.write(data)
tmp.flush()
os.fsync(tmp.fileno())
@@ -589,7 +579,7 @@ class model(watch_base):
def history_trim(self, newest_n_items):
from pathlib import Path
import gc
# Sort by timestamp (key)
sorted_items = sorted(self.history.items(), key=lambda x: int(x[0]))
@@ -606,7 +596,7 @@ class model(watch_base):
finally:
logger.debug(f"[{self.get('uuid')}] Deleted {item[1]} history snapshot")
try:
dest = os.path.join(self.watch_data_dir, self.history_index_filename)
dest = os.path.join(self.data_dir, self.history_index_filename)
output = "\r\n".join(
f"{k},{Path(v).name}"
for k, v in keep_part.items()
@@ -645,7 +635,7 @@ class model(watch_base):
ext = 'bin'
snapshot_fname = f"{snapshot_id}.{ext}"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
dest = os.path.join(self.data_dir, snapshot_fname)
self._write_atomic(dest, contents)
logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)")
@@ -655,7 +645,7 @@ class model(watch_base):
# Compressed text
import brotli
snapshot_fname = f"{snapshot_id}.txt.br"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
dest = os.path.join(self.data_dir, snapshot_fname)
if not os.path.exists(dest):
try:
@@ -666,16 +656,16 @@ class model(watch_base):
logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
# Fallback to uncompressed
snapshot_fname = f"{snapshot_id}.txt"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
dest = os.path.join(self.data_dir, snapshot_fname)
self._write_atomic(dest, contents.encode('utf-8'))
else:
# Plain text
snapshot_fname = f"{snapshot_id}.txt"
dest = os.path.join(self.watch_data_dir, snapshot_fname)
dest = os.path.join(self.data_dir, snapshot_fname)
self._write_atomic(dest, contents.encode('utf-8'))
# Append to history.txt atomically
index_fname = os.path.join(self.watch_data_dir, self.history_index_filename)
index_fname = os.path.join(self.data_dir, self.history_index_filename)
index_line = f"{timestamp},{snapshot_fname}\n"
with open(index_fname, 'a', encoding='utf-8') as f:
@@ -693,10 +683,7 @@ class model(watch_base):
# if self.history_snapshot_max_length: return self.history_snapshot_max_length
# if tag := self._get_override_tag(): return tag.history_snapshot_max_length
# return self._datastore.settings.history_snapshot_max_length
maxlen = (
self.get('history_snapshot_max_length')
or (self.__datastore and self.__datastore['settings']['application'].get('history_snapshot_max_length'))
)
maxlen = self.get('history_snapshot_max_length') or self.get_global_setting('application', 'history_snapshot_max_length')
if maxlen and self.__history_n and self.__history_n > maxlen:
self.history_trim(newest_n_items=maxlen)
@@ -753,7 +740,7 @@ class model(watch_base):
return not local_lines.issubset(existing_history)
def get_screenshot(self):
fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
fname = os.path.join(self.data_dir, "last-screenshot.png")
if os.path.isfile(fname):
return fname
@@ -768,7 +755,7 @@ class model(watch_base):
if not favicon_fname:
return True
try:
fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None)
fname = next(iter(glob.glob(os.path.join(self.data_dir, "favicon.*"))), None)
logger.trace(f"Favicon file maybe found at {fname}")
if os.path.isfile(fname):
file_age = int(time.time() - os.path.getmtime(fname))
@@ -801,7 +788,7 @@ class model(watch_base):
base = "favicon"
extension = "ico"
fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
fname = os.path.join(self.data_dir, f"favicon.{extension}")
try:
# validate=True makes sure the string only contains valid base64 chars
@@ -848,7 +835,7 @@ class model(watch_base):
import glob
# Search for all favicon.* files
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
if not files:
result = None
@@ -875,7 +862,7 @@ class model(watch_base):
import os
import time
thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
thumbnail_path = os.path.join(self.data_dir, "thumbnail.jpeg")
top_trim = 500 # Pixels from top of screenshot to use
screenshot_path = self.get_screenshot()
@@ -926,7 +913,7 @@ class model(watch_base):
return None
def __get_file_ctime(self, filename):
fname = os.path.join(self.watch_data_dir, filename)
fname = os.path.join(self.data_dir, filename)
if os.path.isfile(fname):
return int(os.path.getmtime(fname))
return False
@@ -951,14 +938,9 @@ class model(watch_base):
def snapshot_error_screenshot_ctime(self):
return self.__get_file_ctime('last-error-screenshot.png')
@property
def watch_data_dir(self):
# The base dir of the watch data
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
def get_error_text(self):
"""Return the text saved from a previous request that resulted in a non-200 error"""
fname = os.path.join(self.watch_data_dir, "last-error.txt")
fname = os.path.join(self.data_dir, "last-error.txt")
if os.path.isfile(fname):
with open(fname, 'r', encoding='utf-8') as f:
return f.read()
@@ -966,7 +948,7 @@ class model(watch_base):
def get_error_snapshot(self):
"""Return path to the screenshot that resulted in a non-200 error"""
fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
fname = os.path.join(self.data_dir, "last-error-screenshot.png")
if os.path.isfile(fname):
return fname
return False
@@ -990,34 +972,17 @@ class model(watch_base):
def toggle_mute(self):
self['notification_muted'] ^= True
def commit(self):
def _get_commit_data(self):
"""
Save this watch immediately to disk using atomic write.
Prepare watch data for commit.
Replaces the old dirty-tracking system with immediate persistence.
Uses atomic write pattern (temp file + rename) for crash safety.
Fire-and-forget: Logs errors but does not raise exceptions.
Watch data remains in memory even if save fails, so next commit will retry.
Excludes processor_config_* keys (stored in separate files).
Normalizes browser_steps to empty list if no meaningful steps.
"""
from loguru import logger
if not self.__datastore:
logger.error(f"Cannot commit watch {self.get('uuid')} without datastore reference")
return
if not self.watch_data_dir:
logger.error(f"Cannot commit watch {self.get('uuid')} without datastore_path")
return
# Convert to dict for serialization, excluding processor config keys
# Processor configs are stored separately in processor-specific JSON files
# Use deepcopy to prevent mutations from affecting the original Watch object
import copy
# Acquire datastore lock to prevent concurrent modifications during copy
# Take a quick shallow snapshot under lock, then deep copy outside lock
lock = self.__datastore.lock if self.__datastore and hasattr(self.__datastore, 'lock') else None
# Get base snapshot with lock
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
if lock:
with lock:
@@ -1025,20 +990,17 @@ class model(watch_base):
else:
snapshot = dict(self)
# Deep copy snapshot (slower, but done outside lock to minimize contention)
# Exclude processor config keys (stored separately)
watch_dict = {k: copy.deepcopy(v) for k, v in snapshot.items() if not k.startswith('processor_config_')}
# Normalize browser_steps: if no meaningful steps, save as empty list
if not self.has_browser_steps:
watch_dict['browser_steps'] = []
# Use existing atomic write helper
from changedetectionio.store.file_saving_datastore import save_watch_atomic
try:
save_watch_atomic(self.watch_data_dir, self.get('uuid'), watch_dict)
logger.debug(f"Committed watch {self.get('uuid')}")
except Exception as e:
logger.error(f"Failed to commit watch {self.get('uuid')}: {e}")
return watch_dict
# _save_to_disk() method provided by EntityPersistenceMixin
# commit() method inherited from watch_base
def extra_notification_token_values(self):
@@ -1070,7 +1032,7 @@ class model(watch_base):
if not csv_writer:
# A file on the disk can be transferred much faster via flask than a string reply
csv_output_filename = f"report-{self.get('uuid')}.csv"
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
f = open(os.path.join(self.data_dir, csv_output_filename), 'w')
# @todo some headers in the future
#fieldnames = ['Epoch seconds', 'Date']
csv_writer = csv.writer(f,
@@ -1112,7 +1074,7 @@ class model(watch_base):
def save_error_text(self, contents):
self.ensure_data_dir_exists()
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
target_path = os.path.join(self.data_dir, "last-error.txt")
with open(target_path, 'w', encoding='utf-8') as f:
f.write(contents)
@@ -1121,9 +1083,9 @@ class model(watch_base):
import zlib
if as_error:
target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate")
target_path = os.path.join(str(self.data_dir), "elements-error.deflate")
else:
target_path = os.path.join(str(self.watch_data_dir), "elements.deflate")
target_path = os.path.join(str(self.data_dir), "elements.deflate")
self.ensure_data_dir_exists()
@@ -1138,9 +1100,9 @@ class model(watch_base):
def save_screenshot(self, screenshot: bytes, as_error=False):
if as_error:
target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
target_path = os.path.join(self.data_dir, "last-error-screenshot.png")
else:
target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
target_path = os.path.join(self.data_dir, "last-screenshot.png")
self.ensure_data_dir_exists()
@@ -1151,7 +1113,7 @@ class model(watch_base):
def get_last_fetched_text_before_filters(self):
import brotli
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
filepath = os.path.join(self.data_dir, 'last-fetched.br')
if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0:
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
@@ -1166,13 +1128,13 @@ class model(watch_base):
def save_last_text_fetched_before_filters(self, contents):
import brotli
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
filepath = os.path.join(self.data_dir, 'last-fetched.br')
_brotli_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
def save_last_fetched_html(self, timestamp, contents):
self.ensure_data_dir_exists()
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
filepath = os.path.join(self.data_dir, snapshot_fname)
_brotli_save(contents, filepath, mode=None, fallback_uncompressed=True)
self._prune_last_fetched_html_snapshots()
@@ -1180,7 +1142,7 @@ class model(watch_base):
import brotli
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
filepath = os.path.join(self.data_dir, snapshot_fname)
if os.path.isfile(filepath):
with open(filepath, 'rb') as f:
return (brotli.decompress(f.read()).decode('utf-8'))
@@ -1195,7 +1157,7 @@ class model(watch_base):
for index, timestamp in enumerate(dates):
snapshot_fname = f"{timestamp}.html.br"
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
filepath = os.path.join(self.data_dir, snapshot_fname)
# Keep only the first 2
if index > 1 and os.path.isfile(filepath):
@@ -1206,7 +1168,7 @@ class model(watch_base):
def get_browsersteps_available_screenshots(self):
"For knowing which screenshots are available to show the user in BrowserSteps UI"
available = []
for f in Path(self.watch_data_dir).glob('step_before-*.jpeg'):
for f in Path(self.data_dir).glob('step_before-*.jpeg'):
step_n=re.search(r'step_before-(\d+)', f.name)
if step_n:
available.append(step_n.group(1))

View File

@@ -2,9 +2,14 @@ import os
import uuid
from changedetectionio import strtobool
from .persistence import EntityPersistenceMixin
__all__ = ['EntityPersistenceMixin', 'watch_base']
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
class watch_base(dict):
"""
Base watch domain model (inherits from dict for backward compatibility).
@@ -138,7 +143,7 @@ class watch_base(dict):
Instance Attributes (not serialized):
__datastore: Reference to parent DataStore (set externally after creation)
watch_data_dir: Filesystem path for this watch's data directory
data_dir: Filesystem path for this watch's data directory
Notes:
- Many fields default to None to distinguish "not set" from "set to default"
@@ -149,6 +154,17 @@ class watch_base(dict):
"""
def __init__(self, *arg, **kw):
# Store datastore reference (common to Watch and Tag)
# Use single underscore to avoid name mangling issues in subclasses
self._datastore = kw.get('__datastore')
if kw.get('__datastore'):
del kw['__datastore']
# Store datastore_path (common to Watch and Tag)
self._datastore_path = kw.get('datastore_path')
if kw.get('datastore_path'):
del kw['datastore_path']
self.update({
# Custom notification content
# Re #110, so then if this is set to None, we know to use the default value instead
@@ -318,8 +334,10 @@ class watch_base(dict):
attr_value = getattr(self, attr_name)
# Special handling: Share references to large objects instead of copying
# Examples: __datastore, __app_reference, __global_settings, etc.
if attr_name.endswith('__datastore') or attr_name.endswith('__app'):
# Examples: _datastore, __datastore, __app_reference, __global_settings, etc.
if (attr_name == '_datastore' or
attr_name.endswith('__datastore') or
attr_name.endswith('__app')):
# Share the reference (don't copy!) to prevent memory leaks
setattr(new_obj, attr_name, attr_value)
# Skip cache attributes - let them regenerate on demand
@@ -349,7 +367,8 @@ class watch_base(dict):
try:
attr_value = getattr(self, attr_name)
# Exclude large reference objects and caches from serialization
if not (attr_name.endswith('__datastore') or
if not (attr_name == '_datastore' or
attr_name.endswith('__datastore') or
attr_name.endswith('__app') or
'cache' in attr_name.lower() or
callable(attr_value)):
@@ -377,4 +396,126 @@ class watch_base(dict):
# Restore instance attributes
for attr_name, attr_value in metadata.items():
setattr(self, attr_name, attr_value)
setattr(self, attr_name, attr_value)
@property
def data_dir(self):
"""
The base directory for this watch/tag data (property, computed from UUID).
Common property for both Watch and Tag objects.
Returns path like: /datastore/{uuid}/
"""
return os.path.join(self._datastore_path, self['uuid']) if self._datastore_path else None
def ensure_data_dir_exists(self):
"""
Create the data directory if it doesn't exist.
Common method for both Watch and Tag objects.
"""
from loguru import logger
if not os.path.isdir(self.data_dir):
logger.debug(f"> Creating data dir {self.data_dir}")
os.mkdir(self.data_dir)
def get_global_setting(self, *path):
"""
Get a setting from the global datastore configuration.
Args:
*path: Path to the setting (e.g., 'application', 'history_snapshot_max_length')
Returns:
The setting value, or None if not found
Example:
maxlen = self.get_global_setting('application', 'history_snapshot_max_length')
"""
if not self._datastore:
return None
try:
value = self._datastore['settings']
for key in path:
value = value[key]
return value
except (KeyError, TypeError):
return None
def _get_commit_data(self):
"""
Prepare data for commit (can be overridden by subclasses).
Returns:
dict: Data to serialize (filtered as needed by subclass)
"""
import copy
# Acquire datastore lock to prevent concurrent modifications during copy
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
if lock:
with lock:
snapshot = dict(self)
else:
snapshot = dict(self)
# Deep copy snapshot (slower, but done outside lock to minimize contention)
# Subclasses can override to filter keys (e.g., Watch excludes processor_config_*)
return {k: copy.deepcopy(v) for k, v in snapshot.items()}
def _save_to_disk(self, data_dict, uuid):
"""
Save data to disk (must be implemented by subclasses).
Args:
data_dict: Dictionary to save
uuid: UUID for logging
Raises:
NotImplementedError: If subclass doesn't implement
"""
raise NotImplementedError("Subclass must implement _save_to_disk()")
def commit(self):
"""
Save this watch/tag immediately to disk using atomic write.
Common commit logic for Watch and Tag objects.
Subclasses override _get_commit_data() and _save_to_disk() for specifics.
Fire-and-forget: Logs errors but does not raise exceptions.
Data remains in memory even if save fails, so next commit will retry.
"""
from loguru import logger
if not self.data_dir:
entity_type = self.__class__.__name__
logger.error(f"Cannot commit {entity_type} {self.get('uuid')} without datastore_path")
return
uuid = self.get('uuid')
if not uuid:
entity_type = self.__class__.__name__
logger.error(f"Cannot commit {entity_type} without UUID")
return
# Get data from subclass (may filter keys)
try:
data_dict = self._get_commit_data()
except Exception as e:
logger.error(f"Failed to prepare commit data for {uuid}: {e}")
return
# Save to disk via subclass implementation
try:
# Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
from changedetectionio.model.persistence import _determine_entity_type
entity_type = _determine_entity_type(self.__class__)
filename = f"{entity_type}.json"
self._save_to_disk(data_dict, uuid)
logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
except Exception as e:
logger.error(f"Failed to commit {uuid}: {e}")

View File

@@ -0,0 +1,84 @@
"""
Entity persistence mixin for Watch and Tag models.
Provides file-based persistence using atomic writes.
"""
import functools
import inspect
@functools.lru_cache(maxsize=None)
def _determine_entity_type(cls):
"""
Determine entity type from class hierarchy (cached at class level).
Args:
cls: The class to inspect
Returns:
str: Entity type ('watch', 'tag', etc.)
Raises:
ValueError: If entity type cannot be determined
"""
for base_class in inspect.getmro(cls):
module_name = base_class.__module__
if module_name.startswith('changedetectionio.model.'):
# Get last part after dot: "changedetectionio.model.Watch" -> "watch"
return module_name.split('.')[-1].lower()
raise ValueError(
f"Cannot determine entity type for {cls.__module__}.{cls.__name__}. "
f"Entity must inherit from a class in changedetectionio.model (Watch or Tag)."
)
class EntityPersistenceMixin:
"""
Mixin providing file persistence for watch_base subclasses (Watch, Tag, etc.).
This mixin provides the _save_to_disk() method required by watch_base.commit().
It automatically determines the correct filename and size limits based on class hierarchy.
Usage:
class model(EntityPersistenceMixin, watch_base): # in Watch.py
pass
class model(EntityPersistenceMixin, watch_base): # in Tag.py
pass
"""
def _save_to_disk(self, data_dict, uuid):
"""
Save entity to disk using atomic write.
Implements the abstract method required by watch_base.commit().
Automatically determines filename and size limits from class hierarchy.
Args:
data_dict: Dictionary to save
uuid: UUID for logging
Raises:
ValueError: If entity type cannot be determined from class hierarchy
"""
# Import here to avoid circular dependency
from changedetectionio.store.file_saving_datastore import save_entity_atomic
# Determine entity type (cached at class level, not instance level)
entity_type = _determine_entity_type(self.__class__)
# Set filename and size limits based on entity type
filename = f'{entity_type}.json'
max_size_mb = 10 if entity_type == 'watch' else 1
# Save using generic function
save_entity_atomic(
self.data_dir,
uuid,
data_dict,
filename=filename,
entity_type=entity_type,
max_size_mb=max_size_mb
)

View File

@@ -193,12 +193,12 @@ class difference_detection_processor():
import os
watch = self.datastore.data['watching'].get(self.watch_uuid)
watch_data_dir = watch.watch_data_dir
data_dir = watch.data_dir
if not watch_data_dir:
if not data_dir:
return {}
filepath = os.path.join(watch_data_dir, filename)
filepath = os.path.join(data_dir, filename)
if not os.path.isfile(filepath):
return {}
@@ -223,16 +223,16 @@ class difference_detection_processor():
import os
watch = self.datastore.data['watching'].get(self.watch_uuid)
watch_data_dir = watch.watch_data_dir
data_dir = watch.data_dir
if not watch_data_dir:
logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
if not data_dir:
logger.warning(f"Cannot save extra watch config {filename}: no data_dir")
return
# Ensure directory exists
watch.ensure_data_dir_exists()
filepath = os.path.join(watch_data_dir, filename)
filepath = os.path.join(data_dir, filename)
try:
# If merge is enabled, read existing data first

View File

@@ -414,7 +414,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
# Load historical data if available (for charts/visualization)
comparison_data = {}
comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
comparison_config_path = os.path.join(watch.data_dir, "visual_comparison_data.json")
if os.path.isfile(comparison_config_path):
try:
with open(comparison_config_path, 'r') as f:

View File

@@ -90,7 +90,7 @@ def on_config_save(watch, processor_config, datastore):
processor_config['auto_track_region'] = False
# Remove old template file if exists
template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
template_path = os.path.join(watch.data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
if os.path.exists(template_path):
os.remove(template_path)
logger.debug(f"Removed old template file: {template_path}")

View File

@@ -55,7 +55,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.data_dir):
# Splice in the temporary stuff from the form
form = forms.processor_text_json_diff_form(formdata=form_data if request.method == 'POST' else None,
data=form_data
@@ -68,7 +68,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
blank_watch_no_filters['url'] = tmp_watch.get('url')
latest_filename = next(reversed(tmp_watch.history))
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
html_fname = os.path.join(tmp_watch.data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')

View File

@@ -184,7 +184,8 @@ $(document).ready(function() {
}
// If it's a button in a form, submit the form
else if ($element.is('button')) {
$element.closest('form').submit();
// Use requestSubmit() to include the button's name/value in the form data
$element.closest('form')[0].requestSubmit($element[0]);
}
}
};

View File

@@ -33,7 +33,7 @@ except ImportError:
from ..processors import get_custom_watch_obj_for_processor
# Import the base class and helpers
from .file_saving_datastore import FileSavingDataStore, load_all_watches, save_watch_atomic, save_json_atomic
from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_watch_atomic, save_tag_atomic, save_json_atomic
from .updates import DatastoreUpdatesMixin
from .legacy_loader import has_legacy_datastore
@@ -123,10 +123,17 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
self.__data['settings']['application'].update(settings_data['settings']['application'])
def _rehydrate_tags(self):
"""Rehydrate tag entities from stored data."""
"""Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
from ..model import Tag
for uuid, tag in self.__data['settings']['application']['tags'].items():
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(
uuid, tag, processor_override='restock_diff'
# Force processor to restock_diff for override functionality (technical debt)
tag['processor'] = 'restock_diff'
self.__data['settings']['application']['tags'][uuid] = Tag.model(
datastore_path=self.datastore_path,
__datastore=self.__data,
default=tag
)
logger.info(f"Tag: {uuid} {tag['title']}")
@@ -148,7 +155,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
"""
Load complete datastore state from storage.
Orchestrates loading of settings and watches using polymorphic methods.
Orchestrates loading of settings, watches, and tags using polymorphic methods.
"""
# Load settings
settings_data = self._load_settings()
@@ -157,7 +164,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Load watches (polymorphic - parent class method)
self._load_watches()
# Rehydrate tags
# Load tags from individual tag.json files
# These will override any tags in settings (migration path)
self._load_tags()
# Rehydrate any remaining tags from settings (legacy/fallback)
self._rehydrate_tags()
def reload_state(self, datastore_path, include_default_watches, version_tag):
@@ -232,8 +243,32 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
if not legacy_data:
raise Exception("Failed to load legacy datastore from url-watches.json")
# Store the loaded data
self.__data = legacy_data
# Merge legacy data with base_config defaults (preserves new fields like 'ui')
# self.__data already has App.model() defaults from line 190
logger.info("Merging legacy data with base_config defaults...")
# Apply top-level fields from legacy data
if 'app_guid' in legacy_data:
self.__data['app_guid'] = legacy_data['app_guid']
if 'build_sha' in legacy_data:
self.__data['build_sha'] = legacy_data['build_sha']
if 'version_tag' in legacy_data:
self.__data['version_tag'] = legacy_data['version_tag']
# Apply watching data (complete replacement as these are user's watches)
if 'watching' in legacy_data:
self.__data['watching'] = legacy_data['watching']
# Merge settings sections (preserves base_config defaults for missing fields)
if 'settings' in legacy_data:
if 'headers' in legacy_data['settings']:
self.__data['settings']['headers'].update(legacy_data['settings']['headers'])
if 'requests' in legacy_data['settings']:
self.__data['settings']['requests'].update(legacy_data['settings']['requests'])
if 'application' in legacy_data['settings']:
# CRITICAL: Use .update() to merge, not replace
# This preserves new fields like 'ui' that exist in base_config
self.__data['settings']['application'].update(legacy_data['settings']['application'])
# CRITICAL: Rehydrate watches from dicts into Watch objects
# This ensures watches have their methods available during migration
@@ -336,13 +371,30 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
"""
Build settings data structure for saving.
Tags behavior depends on schema version:
- Before update_28 (schema < 28): Tags saved in settings for migration
- After update_28 (schema >= 28): Tags excluded from settings (in individual files)
Returns:
dict: Settings data ready for serialization
"""
import copy
# Deep copy settings to avoid modifying the original
settings_copy = copy.deepcopy(self.__data['settings'])
# Only exclude tags if we've already migrated them to individual files (schema >= 28)
# This ensures update_28 can migrate tags from settings
schema_version = self.__data['settings']['application'].get('schema_version', 0)
if schema_version >= 28:
# Tags are in individual tag.json files, don't save to settings
settings_copy['application']['tags'] = {}
# else: keep tags in settings for update_28 migration
return {
'note': 'Settings file - watches are stored in individual {uuid}/watch.json files',
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
'app_guid': self.__data['app_guid'],
'settings': self.__data['settings'],
'settings': settings_copy,
'build_sha': self.__data.get('build_sha'),
'version_tag': self.__data.get('version_tag')
}
@@ -360,7 +412,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
"""
settings_data = self._build_settings_data()
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
save_json_atomic(changedetection_json, settings_data, label="settings", max_size_mb=10)
save_json_atomic(changedetection_json, settings_data, label="settings")
def _load_watches(self):
"""
@@ -380,6 +432,37 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
logger.debug(f"Loaded {len(watching)} watches")
def _load_tags(self):
"""
Load all tags from storage.
File backend implementation: reads individual tag.json files.
Tags loaded from files override any tags in settings (migration path).
"""
from ..model import Tag
def rehydrate_tag(uuid, entity_dict):
"""Rehydrate tag as Tag object with forced restock_diff processor."""
entity_dict['uuid'] = uuid
entity_dict['processor'] = 'restock_diff' # Force processor for override functionality
return Tag.model(
datastore_path=self.datastore_path,
__datastore=self.__data,
default=entity_dict
)
tags = load_all_tags(
self.datastore_path,
rehydrate_tag
)
# Override settings tags with loaded tags
# This ensures tag.json files take precedence over settings
if tags:
self.__data['settings']['application']['tags'].update(tags)
logger.info(f"Loaded {len(tags)} tags from individual tag.json files")
def _delete_watch(self, uuid):
"""
Delete a watch from storage.
@@ -706,25 +789,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Old sync_to_json and save_datastore methods removed - now handled by FileSavingDataStore parent class
# Go through the datastore path and remove any snapshots that are not mentioned in the index
# This usually is not used, but can be handy.
def remove_unused_snapshots(self):
logger.info("Removing snapshots from datastore that are not in the index..")
index = []
for uuid in self.data['watching']:
for id in self.data['watching'][uuid].history:
index.append(self.data['watching'][uuid].history[str(id)])
import pathlib
# Only in the sub-directories
for uuid in self.data['watching']:
for item in pathlib.Path(self.datastore_path).rglob(uuid + "/*.txt"):
if not str(item) in index:
logger.info(f"Removing {item}")
unlink(item)
@property
def proxy_list(self):
proxy_list = {}
@@ -816,7 +880,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
if watch:
# In /datastore/xyz-xyz/headers.txt
filepath = os.path.join(watch.watch_data_dir, 'headers.txt')
filepath = os.path.join(watch.data_dir, 'headers.txt')
try:
if os.path.isfile(filepath):
headers.update(parse_headers_from_text_file(filepath))
@@ -876,7 +940,8 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
self.__data['settings']['application']['tags'][new_uuid] = new_tag
self.commit()
# Save tag to its own tag.json file instead of settings
new_tag.commit()
return new_uuid
def get_all_tags_for_watch(self, uuid):

View File

@@ -175,23 +175,46 @@ def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
raise e
def save_entity_atomic(entity_dir, uuid, entity_dict, filename, entity_type, max_size_mb):
"""
Save an entity (watch/tag) to disk using atomic write pattern.
Generic function for saving any watch_base subclass (Watch, Tag, etc.).
Args:
entity_dir: Directory for this entity (e.g., /datastore/{uuid})
uuid: Entity UUID (for logging)
entity_dict: Dictionary representation of the entity
filename: JSON filename (e.g., 'watch.json', 'tag.json')
entity_type: Type label for logging (e.g., 'watch', 'tag')
max_size_mb: Maximum allowed file size in MB
Raises:
ValueError: If serialized data exceeds max_size_mb
OSError: If disk is full (ENOSPC) or other I/O error
"""
entity_json = os.path.join(entity_dir, filename)
save_json_atomic(entity_json, entity_dict, label=f"{entity_type} {uuid}", max_size_mb=max_size_mb)
def save_watch_atomic(watch_dir, uuid, watch_dict):
"""
Save a watch to disk using atomic write pattern.
Convenience wrapper around save_json_atomic for watches.
Args:
watch_dir: Directory for this watch (e.g., /datastore/{uuid})
uuid: Watch UUID (for logging)
watch_dict: Dictionary representation of the watch
Raises:
ValueError: If serialized data exceeds 10MB (indicates bug or corruption)
OSError: If disk is full (ENOSPC) or other I/O error
Convenience wrapper around save_entity_atomic for watches.
Kept for backwards compatibility.
"""
watch_json = os.path.join(watch_dir, "watch.json")
save_json_atomic(watch_json, watch_dict, label=f"watch {uuid}", max_size_mb=10)
save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)
def save_tag_atomic(tag_dir, uuid, tag_dict):
"""
Save a tag to disk using atomic write pattern.
Convenience wrapper around save_entity_atomic for tags.
Kept for backwards compatibility.
"""
save_entity_atomic(tag_dir, uuid, tag_dict, "tag.json", "tag", max_size_mb=1)
def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
@@ -204,8 +227,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
rehydrate_entity_func: Function to convert dict to Watch object
Returns:
Tuple of (Watch object, raw_data_dict) or (None, None) if failed
The raw_data_dict is needed to compute the hash before rehydration
Watch object or None if failed
"""
try:
# Check file size before reading
@@ -218,7 +240,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
f"File: {watch_json}. This indicates a bug or data corruption. "
f"Watch will be skipped."
)
return None, None
return None
if HAS_ORJSON:
with open(watch_json, 'rb') as f:
@@ -227,10 +249,9 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
with open(watch_json, 'r', encoding='utf-8') as f:
watch_data = json.load(f)
# Return both the raw data and the rehydrated watch
# Raw data is needed to compute hash before rehydration changes anything
# Rehydrate and return watch object
watch_obj = rehydrate_entity_func(uuid, watch_data)
return watch_obj, watch_data
return watch_obj
except json.JSONDecodeError as e:
logger.critical(
@@ -238,7 +259,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
f"File: {watch_json}. Error: {e}. "
f"Watch will be skipped and may need manual recovery from backup."
)
return None, None
return None
except ValueError as e:
# orjson raises ValueError for invalid JSON
if "invalid json" in str(e).lower() or HAS_ORJSON:
@@ -247,15 +268,15 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
f"File: {watch_json}. Error: {e}. "
f"Watch will be skipped and may need manual recovery from backup."
)
return None, None
return None
# Re-raise if it's not a JSON parsing error
raise
except FileNotFoundError:
logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
return None, None
return None
except Exception as e:
logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
return None, None
return None
def load_all_watches(datastore_path, rehydrate_entity_func):
@@ -295,8 +316,8 @@ def load_all_watches(datastore_path, rehydrate_entity_func):
for watch_json in watch_files:
# Extract UUID from path: /datastore/{uuid}/watch.json
uuid_dir = os.path.basename(os.path.dirname(watch_json))
watch, raw_data = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
if watch and raw_data:
watch = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
if watch:
watching[uuid_dir] = watch
loaded += 1
@@ -320,6 +341,122 @@ def load_all_watches(datastore_path, rehydrate_entity_func):
return watching
def load_tag_from_file(tag_json, uuid, rehydrate_entity_func):
"""
Load a tag from its JSON file.
Args:
tag_json: Path to the tag.json file
uuid: Tag UUID
rehydrate_entity_func: Function to convert dict to Tag object
Returns:
Tag object or None if failed
"""
try:
# Check file size before reading
file_size = os.path.getsize(tag_json)
MAX_TAG_SIZE = 1 * 1024 * 1024 # 1MB
if file_size > MAX_TAG_SIZE:
logger.critical(
f"CORRUPTED TAG DATA: Tag {uuid} file is unexpectedly large: "
f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_TAG_SIZE / 1024 / 1024}MB). "
f"File: {tag_json}. This indicates a bug or data corruption. "
f"Tag will be skipped."
)
return None
if HAS_ORJSON:
with open(tag_json, 'rb') as f:
tag_data = orjson.loads(f.read())
else:
with open(tag_json, 'r', encoding='utf-8') as f:
tag_data = json.load(f)
tag_data['processor'] = 'restock_diff'
# Rehydrate tag (convert dict to Tag object)
# processor_override is set inside the rehydration function
tag_obj = rehydrate_entity_func(uuid, tag_data)
return tag_obj
except json.JSONDecodeError as e:
logger.critical(
f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
f"File: {tag_json}. Error: {e}. "
f"Tag will be skipped and may need manual recovery from backup."
)
return None
except ValueError as e:
# orjson raises ValueError for invalid JSON
if "invalid json" in str(e).lower() or HAS_ORJSON:
logger.critical(
f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
f"File: {tag_json}. Error: {e}. "
f"Tag will be skipped and may need manual recovery from backup."
)
return None
# Re-raise if it's not a JSON parsing error
raise
except FileNotFoundError:
logger.debug(f"Tag file not found: {tag_json} for tag {uuid}")
return None
except Exception as e:
logger.error(f"Failed to load tag {uuid} from {tag_json}: {e}")
return None
def load_all_tags(datastore_path, rehydrate_entity_func):
"""
Load all tags from individual tag.json files.
Tags are stored separately from settings in {uuid}/tag.json files.
Args:
datastore_path: Path to the datastore directory
rehydrate_entity_func: Function to convert dict to Tag object
Returns:
Dictionary of uuid -> Tag object
"""
logger.info("Loading tags from individual tag.json files...")
tags = {}
if not os.path.exists(datastore_path):
return tags
# Find all tag.json files using glob
tag_files = glob.glob(os.path.join(datastore_path, "*", "tag.json"))
total = len(tag_files)
if total == 0:
logger.debug("No tag.json files found")
return tags
logger.debug(f"Found {total} tag.json files")
loaded = 0
failed = 0
for tag_json in tag_files:
# Extract UUID from path: /datastore/{uuid}/tag.json
uuid_dir = os.path.basename(os.path.dirname(tag_json))
tag = load_tag_from_file(tag_json, uuid_dir, rehydrate_entity_func)
if tag:
tags[uuid_dir] = tag
loaded += 1
else:
# load_tag_from_file already logged the specific error
failed += 1
if failed > 0:
logger.warning(f"Loaded {loaded} tags, {failed} tags FAILED to load")
else:
logger.info(f"Loaded {loaded} tags from disk")
return tags
# ============================================================================
# FileSavingDataStore Class
# ============================================================================

View File

@@ -29,6 +29,7 @@ def create_backup_tarball(datastore_path, update_number):
Includes:
- All {uuid}/watch.json files
- All {uuid}/tag.json files
- changedetection.json (settings, if it exists)
- url-watches.json (legacy format, if it exists)
- Directory structure preserved
@@ -44,7 +45,7 @@ def create_backup_tarball(datastore_path, update_number):
To restore from a backup:
cd /path/to/datastore
tar -xzf before-update-N-timestamp.tar.gz
This will restore all watch.json files and settings to their pre-update state.
This will restore all watch.json and tag.json files and settings to their pre-update state.
"""
timestamp = int(time.time())
backup_filename = f"before-update-{update_number}-{timestamp}.tar.gz"
@@ -66,9 +67,10 @@ def create_backup_tarball(datastore_path, update_number):
tar.add(url_watches_json, arcname="url-watches.json")
logger.debug("Added url-watches.json to backup")
# Backup all watch directories with their watch.json files
# Backup all watch/tag directories with their JSON files
# This preserves the UUID directory structure
watch_count = 0
tag_count = 0
for entry in os.listdir(datastore_path):
entry_path = os.path.join(datastore_path, entry)
@@ -80,17 +82,22 @@ def create_backup_tarball(datastore_path, update_number):
if entry.startswith('.') or entry.startswith('before-update-'):
continue
# Check if this directory has a watch.json (indicates it's a watch UUID directory)
# Backup watch.json if exists
watch_json = os.path.join(entry_path, "watch.json")
if os.path.isfile(watch_json):
# Add the watch.json file preserving directory structure
tar.add(watch_json, arcname=f"{entry}/watch.json")
watch_count += 1
if watch_count % 100 == 0:
logger.debug(f"Backed up {watch_count} watch.json files...")
logger.success(f"Backup created: {backup_filename} ({watch_count} watches)")
# Backup tag.json if exists
tag_json = os.path.join(entry_path, "tag.json")
if os.path.isfile(tag_json):
tar.add(tag_json, arcname=f"{entry}/tag.json")
tag_count += 1
logger.success(f"Backup created: {backup_filename} ({watch_count} watches, {tag_count} tags)")
return backup_path
except Exception as e:
@@ -147,10 +154,10 @@ class DatastoreUpdatesMixin:
2. For each update > current schema version:
- Create backup of datastore
- Run update method
- Update schema version
- Mark settings and watches dirty
- Update schema version and commit settings
- Commit all watches and tags
3. If any update fails, stop processing
4. Save all changes immediately
4. All changes saved via individual .commit() calls
"""
updates_available = self.get_updates_available()
@@ -199,26 +206,11 @@ class DatastoreUpdatesMixin:
# Don't run any more updates
return
else:
# Bump the version, important
# Bump the version
self.data['settings']['application']['schema_version'] = update_n
self.commit()
# CRITICAL: Save all watches so changes are persisted
# Most updates modify watches, and in the new individual watch.json structure,
# we need to ensure those changes are saved
logger.info(f"Saving all {len(self.data['watching'])} watches after update_{update_n} (so that it saves them to disk)")
for uuid in self.data['watching'].keys():
self.data['watching'][uuid].commit()
# Save changes immediately after each update (more resilient than batching)
logger.critical(f"Saving all changes after update_{update_n}")
try:
self._save_dirty_items()
logger.success(f"Update {update_n} changes saved successfully")
except Exception as e:
logger.error(f"Failed to save update_{update_n} changes: {e}")
# Don't raise - update already ran, but changes might not be persisted
# The update will try to run again on next startup
logger.success(f"Update {update_n} completed")
# Track which updates ran
updates_ran.append(update_n)
@@ -468,6 +460,14 @@ class DatastoreUpdatesMixin:
del self.data['watching'][uuid]['extract_title_as_title']
if self.data['settings']['application'].get('extract_title_as_title'):
# Ensure 'ui' key exists (defensive for edge cases where base_config merge didn't happen)
if 'ui' not in self.data['settings']['application']:
self.data['settings']['application']['ui'] = {
'use_page_title_in_list': True,
'open_diff_in_new_tab': True,
'socket_io_enabled': True,
'favicons_enabled': True
}
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
def update_21(self):
@@ -648,23 +648,6 @@ class DatastoreUpdatesMixin:
logger.critical("Reloading datastore from new format...")
self._load_state() # Includes load_watches
logger.success("Datastore reloaded from new format successfully")
# Verify all watches have hashes after migration
missing_hashes = [uuid for uuid in self.data['watching'].keys() if uuid not in self._watch_hashes]
if missing_hashes:
logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after migration: {missing_hashes[:5]}")
else:
logger.success(f"All {len(self.data['watching'])} watches have valid hashes after migration")
# Set schema version to latest available update
# This prevents re-running updates and re-marking all watches as dirty
updates_available = self.get_updates_available()
latest_schema = updates_available[-1] if updates_available else 26
self.data['settings']['application']['schema_version'] = latest_schema
self.commit()
logger.info(f"Set schema_version to {latest_schema} (migration complete, all watches already saved)")
logger.critical("=" * 80)
logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
logger.critical("=" * 80)
@@ -683,4 +666,76 @@ class DatastoreUpdatesMixin:
logger.info("")
def update_26(self):
self.migrate_legacy_db_format()
self.migrate_legacy_db_format()
def update_28(self):
"""
Migrate tags to individual tag.json files.
Tags are currently saved only in changedetection.json (settings).
This migration ALSO saves them to individual {uuid}/tag.json files,
similar to how watches are stored (dual storage).
Benefits:
- Allows atomic tag updates without rewriting entire settings
- Enables independent tag versioning/backup
- Maintains backwards compatibility (tags stay in settings too)
"""
logger.critical("=" * 80)
logger.critical("Running migration: Individual tag persistence (update_28)")
logger.critical("Creating individual tag.json files (tags remain in settings too)")
logger.critical("=" * 80)
tags = self.data['settings']['application'].get('tags', {})
tag_count = len(tags)
if tag_count == 0:
logger.info("No tags found, skipping migration")
return
logger.info(f"Migrating {tag_count} tags to individual tag.json files...")
saved_count = 0
failed_count = 0
for uuid, tag_data in tags.items():
try:
# Force save as tag.json (not watch.json) even if object is corrupted
from changedetectionio.store.file_saving_datastore import save_entity_atomic
import os
tag_dir = os.path.join(self.datastore_path, uuid)
os.makedirs(tag_dir, exist_ok=True)
# Convert to dict if it's an object
tag_dict = dict(tag_data) if hasattr(tag_data, '__iter__') else tag_data
# Save explicitly as tag.json
save_entity_atomic(
tag_dir,
uuid,
tag_dict,
filename='tag.json',
entity_type='tag',
max_size_mb=1
)
saved_count += 1
if saved_count % 10 == 0:
logger.info(f" Progress: {saved_count}/{tag_count} tags migrated...")
except Exception as e:
logger.error(f"Failed to save tag {uuid} ({tag_data.get('title', 'unknown')}): {e}")
failed_count += 1
if failed_count > 0:
logger.warning(f"Migration complete: {saved_count} tags saved, {failed_count} tags FAILED")
else:
logger.success(f"Migration complete: {saved_count} tags saved to individual tag.json files")
# Tags remain in settings for backwards compatibility AND easy access
# On next load, _load_tags() will read from tag.json files and merge with settings
logger.info("Tags saved to both settings AND individual tag.json files")
logger.info("Future tag edits will update both locations (dual storage)")
logger.critical("=" * 80)

View File

@@ -489,6 +489,7 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Test 1: Basic import with tag
res = client.post(
url_for("import") + "?tag=import-test",
data='https://website1.com\r\nhttps://website2.com',
@@ -507,6 +508,97 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
res = client.get(url_for('tags.tags_overview_page'))
assert b'import-test' in res.data
# Test 2: Import with watch configuration fields (issue #3845)
# Test string field (include_filters), boolean (paused), and processor
import urllib.parse
params = urllib.parse.urlencode({
'tag': 'config-test',
'include_filters': 'div.content',
'paused': 'true',
'processor': 'text_json_diff',
'title': 'Imported with Config'
})
res = client.post(
url_for("import") + "?" + params,
data='https://website3.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 200
assert len(res.json) == 1
uuid = res.json[0]
# Verify the configuration was applied
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
assert watch['include_filters'] == ['div.content'], "include_filters should be set as array"
assert watch['paused'] == True, "paused should be True"
assert watch['processor'] == 'text_json_diff', "processor should be set"
assert watch['title'] == 'Imported with Config', "title should be set"
# Test 3: Import with array field (notification_urls) - using valid Apprise format
params = urllib.parse.urlencode({
'tag': 'notification-test',
'notification_urls': 'mailto://test@example.com,mailto://admin@example.com'
})
res = client.post(
url_for("import") + "?" + params,
data='https://website4.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 200
uuid = res.json[0]
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
assert 'mailto://test@example.com' in watch['notification_urls'], "notification_urls should contain first email"
assert 'mailto://admin@example.com' in watch['notification_urls'], "notification_urls should contain second email"
# Test 4: Import with object field (time_between_check)
import json
time_config = json.dumps({"hours": 2, "minutes": 30})
params = urllib.parse.urlencode({
'tag': 'schedule-test',
'time_between_check': time_config
})
res = client.post(
url_for("import") + "?" + params,
data='https://website5.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 200
uuid = res.json[0]
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
assert watch['time_between_check']['hours'] == 2, "time_between_check hours should be 2"
assert watch['time_between_check']['minutes'] == 30, "time_between_check minutes should be 30"
# Test 5: Import with invalid processor (should fail)
res = client.post(
url_for("import") + "?processor=invalid_processor",
data='https://website6.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 400, "Should reject invalid processor"
assert b"Invalid processor" in res.data, "Error message should mention invalid processor"
# Test 6: Import with invalid field (should fail)
res = client.post(
url_for("import") + "?unknown_field=value",
data='https://website7.com',
headers={'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 400, "Should reject unknown field"
assert b"Unknown watch configuration parameter" in res.data, "Error message should mention unknown parameter"
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):

View File

@@ -35,7 +35,7 @@ def test_watch_commit_persists_to_disk(client, live_server):
watch.commit()
# Read directly from disk (bypass datastore cache)
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
assert os.path.exists(watch_json_path), "watch.json should exist on disk"
with open(watch_json_path, 'r') as f:
@@ -89,7 +89,7 @@ def test_watch_commit_atomic_on_crash(client, live_server):
watch.commit()
# Verify watch.json exists and is valid
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
with open(watch_json_path, 'r') as f:
data = json.load(f) # Should not raise JSONDecodeError
assert data['title'] == 'First Save'
@@ -125,7 +125,7 @@ def test_multiple_watches_commit_independently(client, live_server):
# Read all from disk
def read_watch_json(uuid):
watch = datastore.data['watching'][uuid]
path = os.path.join(watch.watch_data_dir, 'watch.json')
path = os.path.join(watch.data_dir, 'watch.json')
with open(path, 'r') as f:
return json.load(f)
@@ -178,7 +178,7 @@ def test_concurrent_watch_commits_dont_corrupt(client, live_server):
assert len(errors) == 0, f"Expected no errors, got: {errors}"
# JSON file should still be valid (not corrupted)
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
with open(watch_json_path, 'r') as f:
data = json.load(f) # Should not raise JSONDecodeError
assert data['uuid'] == uuid, "UUID should still be correct"
@@ -270,7 +270,7 @@ def test_datastore_lock_protects_commit_snapshot(client, live_server):
assert commits_succeeded[0] == 150, f"Expected 150 commits, got {commits_succeeded[0]}"
# Final JSON should be valid
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
with open(watch_json_path, 'r') as f:
data = json.load(f)
assert data['uuid'] == uuid
@@ -301,7 +301,7 @@ def test_processor_config_never_in_watch_json(client, live_server):
watch.commit()
# Read watch.json from disk
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
with open(watch_json_path, 'r') as f:
data = json.load(f)
@@ -343,7 +343,7 @@ def test_api_post_saves_processor_config_separately(client, live_server):
watch = datastore.data['watching'][uuid]
# Check that processor config file exists
processor_config_path = os.path.join(watch.watch_data_dir, 'restock_diff.json')
processor_config_path = os.path.join(watch.data_dir, 'restock_diff.json')
assert os.path.exists(processor_config_path), "Processor config file should exist"
with open(processor_config_path, 'r') as f:
@@ -385,7 +385,7 @@ def test_api_put_saves_processor_config_separately(client, live_server):
watch = datastore.data['watching'][uuid]
# Check processor config file
processor_config_path = os.path.join(watch.watch_data_dir, 'restock_diff.json')
processor_config_path = os.path.join(watch.data_dir, 'restock_diff.json')
assert os.path.exists(processor_config_path), "Processor config file should exist"
with open(processor_config_path, 'r') as f:
@@ -414,7 +414,7 @@ def test_ui_edit_saves_processor_config_separately(client, live_server):
watch.commit()
# Check watch.json has NO processor_config_* fields (main point of this test)
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
with open(watch_json_path, 'r') as f:
watch_data = json.load(f)
@@ -443,7 +443,7 @@ def test_browser_steps_normalized_to_empty_list(client, live_server):
watch.commit()
# Read from disk
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
with open(watch_json_path, 'r') as f:
data = json.load(f)
@@ -547,7 +547,7 @@ def test_tag_delete_removes_from_watches(client, live_server):
# Tag should be removed from watches and persisted
def check_watch_tags(uuid):
watch = datastore.data['watching'][uuid]
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
with open(watch_json_path, 'r') as f:
return json.load(f)['tags']
@@ -572,7 +572,7 @@ def test_watch_pause_unpause_persists(client, live_server):
assert response.status_code == 200
# Check persisted to disk
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
with open(watch_json_path, 'r') as f:
data = json.load(f)
assert data['paused'] == True, "Pause should be persisted"
@@ -601,7 +601,7 @@ def test_watch_mute_unmute_persists(client, live_server):
assert response.status_code == 200
# Check persisted to disk
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
with open(watch_json_path, 'r') as f:
data = json.load(f)
assert data['notification_muted'] == True, "Mute should be persisted"

View File

@@ -474,3 +474,147 @@ the {test} appeared before. {test in res.data[:n]=}
n += t_index + len(test)
delete_all_watches(client)
def test_tag_json_persistence(client, live_server, measure_memory_usage, datastore_path):
"""
Test that tags are saved to individual tag.json files and loaded correctly.
This test verifies the update_27 tag storage refactoring:
- Tags are saved to {uuid}/tag.json files
- Tags persist across datastore restarts
- Tag edits write to tag.json
- Tag deletion removes tag.json file
"""
import json
from changedetectionio.store import ChangeDetectionStore
datastore = client.application.config.get('DATASTORE')
# 1. Create a tag
res = client.post(
url_for("tags.form_tag_add"),
data={"name": "persistence-test-tag"},
follow_redirects=True
)
assert b"Tag added" in res.data
tag_uuid = get_UUID_for_tag_name(client, name="persistence-test-tag")
assert tag_uuid, "Tag UUID should exist"
# 2. Verify tag.json file was created
tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
assert os.path.exists(tag_json_path), f"tag.json should exist at {tag_json_path}"
# 3. Verify tag.json contains correct data
with open(tag_json_path, 'r') as f:
tag_data = json.load(f)
assert tag_data['title'] == 'persistence-test-tag'
assert tag_data['uuid'] == tag_uuid
assert 'date_created' in tag_data
# 4. Edit the tag
res = client.post(
url_for("tags.form_tag_edit_submit", uuid=tag_uuid),
data={
"name": "persistence-test-tag",
"notification_muted": True,
"include_filters": '#test-filter'
},
follow_redirects=True
)
assert b"Updated" in res.data
# 5. Verify tag.json was updated
with open(tag_json_path, 'r') as f:
tag_data = json.load(f)
assert tag_data['notification_muted'] == True
assert '#test-filter' in tag_data.get('include_filters', [])
# 5a. Verify tag is NOT in changedetection.json (tags should be in tag.json only)
changedetection_json_path = os.path.join(datastore_path, "changedetection.json")
with open(changedetection_json_path, 'r') as f:
settings_data = json.load(f)
# Tags dict should be empty in settings (all tags are in individual files)
assert settings_data['settings']['application']['tags'] == {}, \
"Tags should NOT be saved to changedetection.json (should be empty dict)"
# 6. Simulate restart - reload datastore
datastore2 = ChangeDetectionStore(datastore_path=datastore_path, include_default_watches=False, version_tag='test')
# 7. Verify tag was loaded from tag.json
assert tag_uuid in datastore2.data['settings']['application']['tags']
loaded_tag = datastore2.data['settings']['application']['tags'][tag_uuid]
assert loaded_tag['title'] == 'persistence-test-tag'
assert loaded_tag['notification_muted'] == True
assert '#test-filter' in loaded_tag.get('include_filters', [])
# 8. Delete the tag via API
res = client.get(url_for("tags.delete", uuid=tag_uuid), follow_redirects=True)
assert b"Tag deleted" in res.data
# 9. Verify tag.json file was deleted
assert not os.path.exists(tag_json_path), f"tag.json should be deleted at {tag_json_path}"
# 10. Verify tag is removed from settings
assert tag_uuid not in datastore.data['settings']['application']['tags']
delete_all_watches(client)
def test_tag_json_migration_update_27(client, live_server, measure_memory_usage, datastore_path):
"""
Test that update_27 migration correctly moves tags to individual files.
This simulates a pre-update_27 datastore and verifies migration works.
"""
import json
from changedetectionio.store import ChangeDetectionStore
datastore = client.application.config.get('DATASTORE')
# 1. Create multiple tags
tag_names = ['migration-tag-1', 'migration-tag-2', 'migration-tag-3']
tag_uuids = []
for tag_name in tag_names:
res = client.post(
url_for("tags.form_tag_add"),
data={"name": tag_name},
follow_redirects=True
)
assert b"Tag added" in res.data
tag_uuid = get_UUID_for_tag_name(client, name=tag_name)
tag_uuids.append(tag_uuid)
# 2. Verify all tag.json files exist (update_27 already ran during add_tag)
for tag_uuid in tag_uuids:
tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
assert os.path.exists(tag_json_path), f"tag.json should exist for {tag_uuid}"
# 2a. Verify tags are NOT in changedetection.json
changedetection_json_path = os.path.join(datastore_path, "changedetection.json")
with open(changedetection_json_path, 'r') as f:
settings_data = json.load(f)
assert settings_data['settings']['application']['tags'] == {}, \
"Tags should NOT be in changedetection.json after migration"
# 3. Simulate restart
datastore2 = ChangeDetectionStore(datastore_path=datastore_path, include_default_watches=False, version_tag='test')
# 4. Verify all tags loaded from tag.json files
for idx, tag_uuid in enumerate(tag_uuids):
assert tag_uuid in datastore2.data['settings']['application']['tags']
loaded_tag = datastore2.data['settings']['application']['tags'][tag_uuid]
assert loaded_tag['title'] == tag_names[idx]
# Cleanup
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
assert b'All tags deleted' in res.data
# Verify all tag.json files were deleted
for tag_uuid in tag_uuids:
tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
assert not os.path.exists(tag_json_path), f"tag.json should be deleted for {tag_uuid}"
delete_all_watches(client)

View File

@@ -100,11 +100,11 @@ class TestDiffBuilder(unittest.TestCase):
# Test 1: Deepcopy shares datastore reference (doesn't copy it)
watch_copy = deepcopy(watches[0])
self.assertIsNotNone(watch_copy._model__datastore,
self.assertIsNotNone(watch_copy._datastore,
"__datastore should exist in copied watch")
self.assertIs(watch_copy._model__datastore, watches[0]._model__datastore,
self.assertIs(watch_copy._datastore, watches[0]._datastore,
"__datastore should be SHARED (same object), not copied")
self.assertIs(watch_copy._model__datastore, mock_datastore,
self.assertIs(watch_copy._datastore, mock_datastore,
"__datastore should reference the original datastore")
# Test 2: Dict data is properly copied (not shared)
@@ -130,7 +130,7 @@ class TestDiffBuilder(unittest.TestCase):
# All copies should share the same datastore
for copy in copies:
self.assertIs(copy._model__datastore, mock_datastore,
self.assertIs(copy._datastore, mock_datastore,
"All copies should share the original datastore")
def test_watch_pickle_doesnt_serialize_datastore(self):
@@ -160,7 +160,7 @@ class TestDiffBuilder(unittest.TestCase):
"Dict data should be preserved after pickle/unpickle")
# Test 2: __datastore is NOT serialized (attribute shouldn't exist after unpickle)
self.assertFalse(hasattr(unpickled_watch, '_model__datastore'),
self.assertFalse(hasattr(unpickled_watch, '_datastore'),
"__datastore attribute should not exist after unpickle (not serialized)")
# Test 3: Pickled data shouldn't contain the large datastore object
@@ -208,8 +208,8 @@ class TestDiffBuilder(unittest.TestCase):
"Modifying copy should not affect original")
# Test 5: Tag with datastore shares it (doesn't copy it)
if hasattr(tag_with_ds, '_model__datastore'):
self.assertIs(tag_copy2._model__datastore, tag_with_ds._model__datastore,
if hasattr(tag_with_ds, '_datastore'):
self.assertIs(tag_copy2._datastore, tag_with_ds._datastore,
"Tag should share __datastore reference like Watch does")
def test_watch_copy_performance(self):