mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-02-14 02:06:04 +00:00
Compare commits
6 Commits
datastore-
...
3845-impor
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b5b5e8d3e4 | ||
|
|
6e90a0bbd1 | ||
|
|
987789425d | ||
|
|
892b645147 | ||
|
|
278da3fa9b | ||
|
|
c577bd700c |
@@ -182,7 +182,6 @@ def main():
|
||||
from changedetectionio.flask_app import changedetection_app
|
||||
|
||||
datastore_path = None
|
||||
do_cleanup = False
|
||||
# Set a default logger level
|
||||
logger_level = 'DEBUG'
|
||||
include_default_watches = True
|
||||
@@ -265,7 +264,7 @@ def main():
|
||||
i += 1
|
||||
|
||||
try:
|
||||
opts, args = getopt.getopt(cleaned_argv[1:], "6Ccsd:h:p:l:P:", "port")
|
||||
opts, args = getopt.getopt(cleaned_argv[1:], "6Csd:h:p:l:P:", "port")
|
||||
except getopt.GetoptError as e:
|
||||
print_help()
|
||||
print(f'Error: {e}')
|
||||
@@ -293,10 +292,6 @@ def main():
|
||||
if opt == '-d':
|
||||
datastore_path = arg
|
||||
|
||||
# Cleanup (remove text files that arent in the index)
|
||||
if opt == '-c':
|
||||
do_cleanup = True
|
||||
|
||||
# Create the datadir if it doesnt exist
|
||||
if opt == '-C':
|
||||
create_datastore_dir = True
|
||||
@@ -602,10 +597,6 @@ def main():
|
||||
else:
|
||||
logger.info("SIGUSR1 handler only registered on Linux, skipped.")
|
||||
|
||||
# Go into cleanup mode
|
||||
if do_cleanup:
|
||||
datastore.remove_unused_snapshots()
|
||||
|
||||
app.config['datastore_path'] = datastore_path
|
||||
|
||||
|
||||
@@ -614,7 +605,7 @@ def main():
|
||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||
has_password=datastore.data['settings']['application']['password'] != False,
|
||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True),
|
||||
socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True),
|
||||
all_paused=datastore.data['settings']['application'].get('all_paused', False),
|
||||
all_muted=datastore.data['settings']['application'].get('all_muted', False)
|
||||
)
|
||||
|
||||
@@ -2,8 +2,9 @@ from changedetectionio.strtobool import strtobool
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request
|
||||
from functools import wraps
|
||||
from . import auth, validate_openapi_request
|
||||
from . import auth, validate_openapi_request, schema_create_watch
|
||||
from ..validate_url import is_safe_valid_url
|
||||
import json
|
||||
|
||||
|
||||
def default_content_type(content_type='text/plain'):
|
||||
@@ -19,6 +20,62 @@ def default_content_type(content_type='text/plain'):
|
||||
return decorator
|
||||
|
||||
|
||||
def convert_query_param_to_type(value, schema_property):
|
||||
"""
|
||||
Convert a query parameter string to the appropriate type based on schema definition.
|
||||
|
||||
Args:
|
||||
value: String value from query parameter
|
||||
schema_property: Schema property definition with 'type' or 'anyOf' field
|
||||
|
||||
Returns:
|
||||
Converted value in the appropriate type
|
||||
"""
|
||||
# Handle anyOf schemas (extract the first type)
|
||||
if 'anyOf' in schema_property:
|
||||
# Use the first non-null type from anyOf
|
||||
for option in schema_property['anyOf']:
|
||||
if option.get('type') and option.get('type') != 'null':
|
||||
prop_type = option.get('type')
|
||||
break
|
||||
else:
|
||||
prop_type = None
|
||||
else:
|
||||
prop_type = schema_property.get('type')
|
||||
|
||||
# Handle array type (e.g., notification_urls)
|
||||
if prop_type == 'array':
|
||||
# Support both comma-separated and JSON array format
|
||||
if value.startswith('['):
|
||||
try:
|
||||
return json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
return [v.strip() for v in value.split(',')]
|
||||
return [v.strip() for v in value.split(',')]
|
||||
|
||||
# Handle object type (e.g., time_between_check, headers)
|
||||
elif prop_type == 'object':
|
||||
try:
|
||||
return json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError(f"Invalid JSON object for field: {value}")
|
||||
|
||||
# Handle boolean type
|
||||
elif prop_type == 'boolean':
|
||||
return strtobool(value)
|
||||
|
||||
# Handle integer type
|
||||
elif prop_type == 'integer':
|
||||
return int(value)
|
||||
|
||||
# Handle number type (float)
|
||||
elif prop_type == 'number':
|
||||
return float(value)
|
||||
|
||||
# Default: return as string
|
||||
return value
|
||||
|
||||
|
||||
class Import(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
@@ -28,25 +85,79 @@ class Import(Resource):
|
||||
@default_content_type('text/plain') #3547 #3542
|
||||
@validate_openapi_request('importWatches')
|
||||
def post(self):
|
||||
"""Import a list of watched URLs."""
|
||||
"""Import a list of watched URLs with optional watch configuration."""
|
||||
|
||||
# Special parameters that are NOT watch configuration
|
||||
special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'}
|
||||
|
||||
extras = {}
|
||||
|
||||
# Handle special 'proxy' parameter
|
||||
if request.args.get('proxy'):
|
||||
plist = self.datastore.proxy_list
|
||||
if not request.args.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
proxy_list_str = ', '.join(plist) if plist else 'none configured'
|
||||
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
|
||||
else:
|
||||
extras['proxy'] = request.args.get('proxy')
|
||||
|
||||
# Handle special 'dedupe' parameter
|
||||
dedupe = strtobool(request.args.get('dedupe', 'true'))
|
||||
|
||||
# Handle special 'tag' and 'tag_uuids' parameters
|
||||
tags = request.args.get('tag')
|
||||
tag_uuids = request.args.get('tag_uuids')
|
||||
|
||||
if tag_uuids:
|
||||
tag_uuids = tag_uuids.split(',')
|
||||
|
||||
# Extract ALL other query parameters as watch configuration
|
||||
schema_properties = schema_create_watch.get('properties', {})
|
||||
for param_name, param_value in request.args.items():
|
||||
# Skip special parameters
|
||||
if param_name in special_params:
|
||||
continue
|
||||
|
||||
# Skip if not in schema (unknown parameter)
|
||||
if param_name not in schema_properties:
|
||||
return f"Unknown watch configuration parameter: {param_name}", 400
|
||||
|
||||
# Convert to appropriate type based on schema
|
||||
try:
|
||||
converted_value = convert_query_param_to_type(param_value, schema_properties[param_name])
|
||||
extras[param_name] = converted_value
|
||||
except (ValueError, json.JSONDecodeError) as e:
|
||||
return f"Invalid value for parameter '{param_name}': {str(e)}", 400
|
||||
|
||||
# Validate processor if provided
|
||||
if 'processor' in extras:
|
||||
from changedetectionio.processors import available_processors
|
||||
available = [p[0] for p in available_processors()]
|
||||
if extras['processor'] not in available:
|
||||
return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400
|
||||
|
||||
# Validate fetch_backend if provided
|
||||
if 'fetch_backend' in extras:
|
||||
from changedetectionio.content_fetchers import available_fetchers
|
||||
available = [f[0] for f in available_fetchers()]
|
||||
# Also allow 'system' and extra_browser_* patterns
|
||||
is_valid = (
|
||||
extras['fetch_backend'] == 'system' or
|
||||
extras['fetch_backend'] in available or
|
||||
extras['fetch_backend'].startswith('extra_browser_')
|
||||
)
|
||||
if not is_valid:
|
||||
return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400
|
||||
|
||||
# Validate notification_urls if provided
|
||||
if 'notification_urls' in extras:
|
||||
from wtforms import ValidationError
|
||||
from changedetectionio.api.Notifications import validate_notification_urls
|
||||
try:
|
||||
validate_notification_urls(extras['notification_urls'])
|
||||
except ValidationError as e:
|
||||
return f"Invalid notification_urls: {str(e)}", 400
|
||||
|
||||
urls = request.get_data().decode('utf8').splitlines()
|
||||
added = []
|
||||
for url in urls:
|
||||
@@ -54,13 +165,15 @@ class Import(Resource):
|
||||
if not len(url):
|
||||
continue
|
||||
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
# Validate URL
|
||||
if not is_safe_valid_url(url):
|
||||
return f"Invalid or unsupported URL - {url}", 400
|
||||
|
||||
# Check for duplicates if dedupe is enabled
|
||||
if dedupe and self.datastore.url_exists(url):
|
||||
continue
|
||||
|
||||
# Create watch with extras configuration
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||
added.append(new_uuid)
|
||||
|
||||
|
||||
@@ -24,8 +24,7 @@ class Tag(Resource):
|
||||
@validate_openapi_request('getTag')
|
||||
def get(self, uuid):
|
||||
"""Get data for a single tag/group, toggle notification muting, or recheck all."""
|
||||
from copy import deepcopy
|
||||
tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if not tag:
|
||||
abort(404, message=f'No tag exists with the UUID of {uuid}')
|
||||
|
||||
@@ -62,12 +61,12 @@ class Tag(Resource):
|
||||
return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202
|
||||
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
|
||||
self.datastore.commit()
|
||||
tag['notification_muted'] = True
|
||||
tag.commit()
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False
|
||||
self.datastore.commit()
|
||||
tag['notification_muted'] = False
|
||||
tag.commit()
|
||||
return "OK", 200
|
||||
|
||||
return tag
|
||||
@@ -81,7 +80,17 @@ class Tag(Resource):
|
||||
|
||||
# Delete the tag, and any tag reference
|
||||
del self.datastore.data['settings']['application']['tags'][uuid]
|
||||
self.datastore.commit()
|
||||
|
||||
# Delete tag.json file if it exists
|
||||
import os
|
||||
tag_dir = os.path.join(self.datastore.datastore_path, uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
||||
|
||||
# Remove tag from all watches
|
||||
for watch_uuid, watch in self.datastore.data['watching'].items():
|
||||
@@ -111,7 +120,7 @@ class Tag(Resource):
|
||||
return str(e), 400
|
||||
|
||||
tag.update(request.json)
|
||||
self.datastore.commit()
|
||||
tag.commit()
|
||||
|
||||
return "OK", 200
|
||||
|
||||
|
||||
@@ -374,10 +374,10 @@ class WatchFavicon(Resource):
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
filepath = os.path.join(watch.data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
|
||||
@@ -47,7 +47,7 @@ def create_backup(datastore_path, watches: dict):
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in watches.items():
|
||||
for f in Path(w.watch_data_dir).glob('*'):
|
||||
for f in Path(w.data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
# Use the full path to access the file, but make the file 'relative' in the Zip.
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
|
||||
@@ -285,8 +285,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
filename = f"step_before-{step_n}.jpeg" if request.args.get('type', '') == 'before' else f"step_{step_n}.jpeg"
|
||||
|
||||
if step_n and watch and os.path.isfile(os.path.join(watch.watch_data_dir, filename)):
|
||||
response = make_response(send_from_directory(directory=watch.watch_data_dir, path=filename))
|
||||
if step_n and watch and os.path.isfile(os.path.join(watch.data_dir, filename)):
|
||||
response = make_response(send_from_directory(directory=watch.data_dir, path=filename))
|
||||
response.headers['Content-type'] = 'image/jpeg'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import os
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from zoneinfo import ZoneInfo, available_timezones
|
||||
import secrets
|
||||
import time
|
||||
import flask_login
|
||||
from flask import Blueprint, render_template, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
@@ -142,6 +143,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
active_plugins = get_active_plugins()
|
||||
python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
||||
|
||||
# Calculate uptime in seconds
|
||||
uptime_seconds = time.time() - datastore.start_time
|
||||
|
||||
# Get plugin settings tabs and instantiate forms
|
||||
plugin_tabs = get_plugin_settings_tabs()
|
||||
plugin_forms = {}
|
||||
@@ -160,6 +164,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
active_plugins=active_plugins,
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
python_version=python_version,
|
||||
uptime_seconds=uptime_seconds,
|
||||
available_timezones=sorted(available_timezones()),
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
|
||||
|
||||
@@ -394,6 +394,7 @@ nav
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<div class="tab-pane-inner" id="info">
|
||||
<p><strong>{{ _('Uptime:') }}</strong> {{ uptime_seconds|format_duration }}</p>
|
||||
<p><strong>{{ _('Python version:') }}</strong> {{ python_version }}</p>
|
||||
<p><strong>{{ _('Plugins active:') }}</strong></p>
|
||||
{% if active_plugins %}
|
||||
|
||||
@@ -57,9 +57,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@tags_blueprint.route("/mute/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def mute(uuid):
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = not datastore.data['settings']['application']['tags'][uuid]['notification_muted']
|
||||
datastore.commit()
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if tag:
|
||||
tag['notification_muted'] = not tag['notification_muted']
|
||||
tag.commit()
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@tags_blueprint.route("/delete/<string:uuid>", methods=['GET'])
|
||||
@@ -69,6 +70,17 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
if datastore.data['settings']['application']['tags'].get(uuid):
|
||||
del datastore.data['settings']['application']['tags'][uuid]
|
||||
|
||||
# Delete tag.json file if it exists
|
||||
import os
|
||||
tag_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
logger.info(f"Deleted tag.json for tag {uuid}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {uuid}: {e}")
|
||||
|
||||
# Remove tag from all watches in background thread to avoid blocking
|
||||
def remove_tag_background(tag_uuid):
|
||||
"""Background thread to remove tag from watches - discarded after completion."""
|
||||
@@ -115,9 +127,19 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
@tags_blueprint.route("/delete_all", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def delete_all():
|
||||
# Delete all tag.json files
|
||||
import os
|
||||
for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()):
|
||||
tag_dir = os.path.join(datastore.datastore_path, tag_uuid)
|
||||
tag_json = os.path.join(tag_dir, "tag.json")
|
||||
if os.path.exists(tag_json):
|
||||
try:
|
||||
os.unlink(tag_json)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete tag.json for tag {tag_uuid}: {e}")
|
||||
|
||||
# Clear all tags from settings immediately
|
||||
datastore.data['settings']['application']['tags'] = {}
|
||||
datastore.commit()
|
||||
|
||||
# Clear tags from all watches in background thread to avoid blocking
|
||||
def clear_all_tags_background():
|
||||
@@ -207,10 +229,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
|
||||
|
||||
default = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
tag = datastore.data['settings']['application']['tags'].get(uuid)
|
||||
|
||||
form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None,
|
||||
data=default,
|
||||
data=tag,
|
||||
extra_notification_tokens=datastore.get_unique_notification_tokens_available()
|
||||
)
|
||||
# @todo subclass form so validation works
|
||||
@@ -219,9 +241,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# flash(','.join(l), 'error')
|
||||
# return redirect(url_for('tags.form_tag_edit_submit', uuid=uuid))
|
||||
|
||||
datastore.data['settings']['application']['tags'][uuid].update(form.data)
|
||||
datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.commit()
|
||||
tag.update(form.data)
|
||||
tag['processor'] = 'restock_diff'
|
||||
tag.commit()
|
||||
flash(gettext("Updated"))
|
||||
|
||||
return redirect(url_for('tags.tags_overview_page'))
|
||||
|
||||
@@ -337,9 +337,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.data_dir):
|
||||
latest_filename = list(watch.history.keys())[-1]
|
||||
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
html_fname = os.path.join(watch.data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
if html_fname.endswith('.br'):
|
||||
# Read and decompress the Brotli file
|
||||
|
||||
@@ -266,6 +266,47 @@ def _jinja2_filter_seconds_precise(timestamp):
|
||||
|
||||
return format(int(time.time()-timestamp), ',d')
|
||||
|
||||
@app.template_filter('format_duration')
|
||||
def _jinja2_filter_format_duration(seconds):
|
||||
"""Format a duration in seconds into human readable string like '5 days, 3 hours, 30 minutes'"""
|
||||
from datetime import timedelta
|
||||
|
||||
if not seconds or seconds < 0:
|
||||
return gettext('0 seconds')
|
||||
|
||||
td = timedelta(seconds=int(seconds))
|
||||
|
||||
# Calculate components
|
||||
years = td.days // 365
|
||||
remaining_days = td.days % 365
|
||||
months = remaining_days // 30
|
||||
remaining_days = remaining_days % 30
|
||||
weeks = remaining_days // 7
|
||||
days = remaining_days % 7
|
||||
|
||||
hours = td.seconds // 3600
|
||||
minutes = (td.seconds % 3600) // 60
|
||||
secs = td.seconds % 60
|
||||
|
||||
# Build parts list
|
||||
parts = []
|
||||
if years > 0:
|
||||
parts.append(f"{years} {gettext('year') if years == 1 else gettext('years')}")
|
||||
if months > 0:
|
||||
parts.append(f"{months} {gettext('month') if months == 1 else gettext('months')}")
|
||||
if weeks > 0:
|
||||
parts.append(f"{weeks} {gettext('week') if weeks == 1 else gettext('weeks')}")
|
||||
if days > 0:
|
||||
parts.append(f"{days} {gettext('day') if days == 1 else gettext('days')}")
|
||||
if hours > 0:
|
||||
parts.append(f"{hours} {gettext('hour') if hours == 1 else gettext('hours')}")
|
||||
if minutes > 0:
|
||||
parts.append(f"{minutes} {gettext('minute') if minutes == 1 else gettext('minutes')}")
|
||||
if secs > 0 or not parts:
|
||||
parts.append(f"{secs} {gettext('second') if secs == 1 else gettext('seconds')}")
|
||||
|
||||
return ", ".join(parts)
|
||||
|
||||
@app.template_filter('fetcher_status_icons')
|
||||
def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
"""Get status icon HTML for a given fetcher.
|
||||
@@ -703,10 +744,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
# Use cached MIME type detection
|
||||
filepath = os.path.join(watch.watch_data_dir, favicon_filename)
|
||||
filepath = os.path.join(watch.data_dir, favicon_filename)
|
||||
mime = get_favicon_mime_type(filepath)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response = make_response(send_from_directory(watch.data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
@@ -807,7 +848,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||
|
||||
# Initialize Socket.IO server conditionally based on settings
|
||||
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
||||
socket_io_enabled = datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True)
|
||||
if socket_io_enabled and app.config.get('batch_mode'):
|
||||
socket_io_enabled = False
|
||||
if socket_io_enabled:
|
||||
|
||||
@@ -20,10 +20,12 @@ See: Watch.py model docstring for full Pydantic architecture explanation
|
||||
See: processors/restock_diff/processor.py:184-192 for current manual implementation
|
||||
"""
|
||||
|
||||
import os
|
||||
from changedetectionio.model import watch_base
|
||||
from changedetectionio.model.persistence import EntityPersistenceMixin
|
||||
|
||||
|
||||
class model(watch_base):
|
||||
class model(EntityPersistenceMixin, watch_base):
|
||||
"""
|
||||
Tag domain model - groups watches and can override their settings.
|
||||
|
||||
@@ -42,11 +44,7 @@ class model(watch_base):
|
||||
"""
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Store datastore reference (optional for Tags, but good for consistency)
|
||||
self.__datastore = kw.get('__datastore')
|
||||
if kw.get('__datastore'):
|
||||
del kw['__datastore']
|
||||
|
||||
# Parent class (watch_base) handles __datastore and __datastore_path
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
self['overrides_watch'] = kw.get('default', {}).get('overrides_watch')
|
||||
@@ -54,3 +52,7 @@ class model(watch_base):
|
||||
if kw.get('default'):
|
||||
self.update(kw['default'])
|
||||
del kw['default']
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() and _get_commit_data() methods inherited from watch_base
|
||||
# Tag uses default _get_commit_data() (includes all keys)
|
||||
|
||||
@@ -24,8 +24,6 @@ The dream architecture would use Pydantic for:
|
||||
See class model docstring for detailed explanation and examples.
|
||||
See: processors/restock_diff/processor.py:184-192 for manual resolution example
|
||||
"""
|
||||
import gc
|
||||
from copy import copy
|
||||
|
||||
from blinker import signal
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
@@ -33,6 +31,7 @@ from changedetectionio.validate_url import is_safe_valid_url
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from . import watch_base
|
||||
from .persistence import EntityPersistenceMixin
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
@@ -129,7 +128,7 @@ def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False):
|
||||
raise Exception(f"Brotli compression failed for {filepath}: {e}")
|
||||
|
||||
|
||||
class model(watch_base):
|
||||
class model(EntityPersistenceMixin, watch_base):
|
||||
"""
|
||||
Watch domain model for monitoring URL changes.
|
||||
|
||||
@@ -228,16 +227,11 @@ class model(watch_base):
|
||||
jitter_seconds = 0
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
self.__datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
self.__datastore = kw.get('__datastore')
|
||||
if not self.__datastore:
|
||||
# Validate __datastore before calling parent (Watch requires it)
|
||||
if not kw.get('__datastore'):
|
||||
raise ValueError("Watch object requires '__datastore' reference - cannot access global settings without it")
|
||||
if kw.get('__datastore'):
|
||||
del kw['__datastore']
|
||||
|
||||
# Parent class (watch_base) handles __datastore and __datastore_path
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
|
||||
if kw.get('default'):
|
||||
@@ -265,11 +259,6 @@ class model(watch_base):
|
||||
def has_unviewed(self):
|
||||
return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
if not os.path.isdir(self.watch_data_dir):
|
||||
logger.debug(f"> Creating data dir {self.watch_data_dir}")
|
||||
os.mkdir(self.watch_data_dir)
|
||||
|
||||
@property
|
||||
def link(self):
|
||||
|
||||
@@ -325,7 +314,8 @@ class model(watch_base):
|
||||
|
||||
# JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
|
||||
# But preserve processor config files (they're configuration, not history data)
|
||||
for item in pathlib.Path(str(self.watch_data_dir)).rglob("*.*"):
|
||||
# Use glob not rglob here for safety.
|
||||
for item in pathlib.Path(str(self.data_dir)).glob("*.*"):
|
||||
# Skip processor config files
|
||||
if item.name in processor_config_files:
|
||||
continue
|
||||
@@ -434,11 +424,11 @@ class model(watch_base):
|
||||
tmp_history = {}
|
||||
|
||||
# In the case we are only using the watch for processing without history
|
||||
if not self.watch_data_dir:
|
||||
if not self.data_dir:
|
||||
return []
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
if os.path.isfile(fname):
|
||||
logger.debug(f"Reading watch history index for {self.get('uuid')}")
|
||||
with open(fname, "r", encoding='utf-8') as f:
|
||||
@@ -451,13 +441,13 @@ class model(watch_base):
|
||||
# Cross-platform: check for any path separator (works on Windows and Unix)
|
||||
if os.sep not in v and '/' not in v and '\\' not in v:
|
||||
# Relative filename only, no path separators
|
||||
v = os.path.join(self.watch_data_dir, v)
|
||||
v = os.path.join(self.data_dir, v)
|
||||
else:
|
||||
# It's possible that they moved the datadir on older versions
|
||||
# So the snapshot exists but is in a different path
|
||||
# Cross-platform: use os.path.basename instead of split('/')
|
||||
snapshot_fname = os.path.basename(v)
|
||||
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
proposed_new_path = os.path.join(self.data_dir, snapshot_fname)
|
||||
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||
v = proposed_new_path
|
||||
|
||||
@@ -474,7 +464,7 @@ class model(watch_base):
|
||||
|
||||
@property
|
||||
def has_history(self):
|
||||
fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
return os.path.isfile(fname)
|
||||
|
||||
@property
|
||||
@@ -580,7 +570,7 @@ class model(watch_base):
|
||||
def _write_atomic(self, dest, data, mode='wb'):
|
||||
"""Write data atomically to dest using a temp file"""
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.watch_data_dir) as tmp:
|
||||
with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.data_dir) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
@@ -589,7 +579,7 @@ class model(watch_base):
|
||||
|
||||
def history_trim(self, newest_n_items):
|
||||
from pathlib import Path
|
||||
|
||||
import gc
|
||||
# Sort by timestamp (key)
|
||||
sorted_items = sorted(self.history.items(), key=lambda x: int(x[0]))
|
||||
|
||||
@@ -606,7 +596,7 @@ class model(watch_base):
|
||||
finally:
|
||||
logger.debug(f"[{self.get('uuid')}] Deleted {item[1]} history snapshot")
|
||||
try:
|
||||
dest = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
dest = os.path.join(self.data_dir, self.history_index_filename)
|
||||
output = "\r\n".join(
|
||||
f"{k},{Path(v).name}"
|
||||
for k, v in keep_part.items()
|
||||
@@ -645,7 +635,7 @@ class model(watch_base):
|
||||
ext = 'bin'
|
||||
|
||||
snapshot_fname = f"{snapshot_id}.{ext}"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents)
|
||||
logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)")
|
||||
|
||||
@@ -655,7 +645,7 @@ class model(watch_base):
|
||||
# Compressed text
|
||||
import brotli
|
||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
|
||||
if not os.path.exists(dest):
|
||||
try:
|
||||
@@ -666,16 +656,16 @@ class model(watch_base):
|
||||
logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}")
|
||||
# Fallback to uncompressed
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
else:
|
||||
# Plain text
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
dest = os.path.join(self.data_dir, snapshot_fname)
|
||||
self._write_atomic(dest, contents.encode('utf-8'))
|
||||
|
||||
# Append to history.txt atomically
|
||||
index_fname = os.path.join(self.watch_data_dir, self.history_index_filename)
|
||||
index_fname = os.path.join(self.data_dir, self.history_index_filename)
|
||||
index_line = f"{timestamp},{snapshot_fname}\n"
|
||||
|
||||
with open(index_fname, 'a', encoding='utf-8') as f:
|
||||
@@ -693,10 +683,7 @@ class model(watch_base):
|
||||
# if self.history_snapshot_max_length: return self.history_snapshot_max_length
|
||||
# if tag := self._get_override_tag(): return tag.history_snapshot_max_length
|
||||
# return self._datastore.settings.history_snapshot_max_length
|
||||
maxlen = (
|
||||
self.get('history_snapshot_max_length')
|
||||
or (self.__datastore and self.__datastore['settings']['application'].get('history_snapshot_max_length'))
|
||||
)
|
||||
maxlen = self.get('history_snapshot_max_length') or self.get_global_setting('application', 'history_snapshot_max_length')
|
||||
|
||||
if maxlen and self.__history_n and self.__history_n > maxlen:
|
||||
self.history_trim(newest_n_items=maxlen)
|
||||
@@ -753,7 +740,7 @@ class model(watch_base):
|
||||
return not local_lines.issubset(existing_history)
|
||||
|
||||
def get_screenshot(self):
|
||||
fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
fname = os.path.join(self.data_dir, "last-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
|
||||
@@ -768,7 +755,7 @@ class model(watch_base):
|
||||
if not favicon_fname:
|
||||
return True
|
||||
try:
|
||||
fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None)
|
||||
fname = next(iter(glob.glob(os.path.join(self.data_dir, "favicon.*"))), None)
|
||||
logger.trace(f"Favicon file maybe found at {fname}")
|
||||
if os.path.isfile(fname):
|
||||
file_age = int(time.time() - os.path.getmtime(fname))
|
||||
@@ -801,7 +788,7 @@ class model(watch_base):
|
||||
base = "favicon"
|
||||
extension = "ico"
|
||||
|
||||
fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
|
||||
fname = os.path.join(self.data_dir, f"favicon.{extension}")
|
||||
|
||||
try:
|
||||
# validate=True makes sure the string only contains valid base64 chars
|
||||
@@ -848,7 +835,7 @@ class model(watch_base):
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
|
||||
files = glob.glob(os.path.join(self.data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
result = None
|
||||
@@ -875,7 +862,7 @@ class model(watch_base):
|
||||
import os
|
||||
import time
|
||||
|
||||
thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
|
||||
thumbnail_path = os.path.join(self.data_dir, "thumbnail.jpeg")
|
||||
top_trim = 500 # Pixels from top of screenshot to use
|
||||
|
||||
screenshot_path = self.get_screenshot()
|
||||
@@ -926,7 +913,7 @@ class model(watch_base):
|
||||
return None
|
||||
|
||||
def __get_file_ctime(self, filename):
|
||||
fname = os.path.join(self.watch_data_dir, filename)
|
||||
fname = os.path.join(self.data_dir, filename)
|
||||
if os.path.isfile(fname):
|
||||
return int(os.path.getmtime(fname))
|
||||
return False
|
||||
@@ -951,14 +938,9 @@ class model(watch_base):
|
||||
def snapshot_error_screenshot_ctime(self):
|
||||
return self.__get_file_ctime('last-error-screenshot.png')
|
||||
|
||||
@property
|
||||
def watch_data_dir(self):
|
||||
# The base dir of the watch data
|
||||
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
|
||||
|
||||
def get_error_text(self):
|
||||
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
fname = os.path.join(self.data_dir, "last-error.txt")
|
||||
if os.path.isfile(fname):
|
||||
with open(fname, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
@@ -966,7 +948,7 @@ class model(watch_base):
|
||||
|
||||
def get_error_snapshot(self):
|
||||
"""Return path to the screenshot that resulted in a non-200 error"""
|
||||
fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
fname = os.path.join(self.data_dir, "last-error-screenshot.png")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
return False
|
||||
@@ -990,34 +972,17 @@ class model(watch_base):
|
||||
def toggle_mute(self):
|
||||
self['notification_muted'] ^= True
|
||||
|
||||
def commit(self):
|
||||
def _get_commit_data(self):
|
||||
"""
|
||||
Save this watch immediately to disk using atomic write.
|
||||
Prepare watch data for commit.
|
||||
|
||||
Replaces the old dirty-tracking system with immediate persistence.
|
||||
Uses atomic write pattern (temp file + rename) for crash safety.
|
||||
|
||||
Fire-and-forget: Logs errors but does not raise exceptions.
|
||||
Watch data remains in memory even if save fails, so next commit will retry.
|
||||
Excludes processor_config_* keys (stored in separate files).
|
||||
Normalizes browser_steps to empty list if no meaningful steps.
|
||||
"""
|
||||
from loguru import logger
|
||||
|
||||
if not self.__datastore:
|
||||
logger.error(f"Cannot commit watch {self.get('uuid')} without datastore reference")
|
||||
return
|
||||
|
||||
if not self.watch_data_dir:
|
||||
logger.error(f"Cannot commit watch {self.get('uuid')} without datastore_path")
|
||||
return
|
||||
|
||||
# Convert to dict for serialization, excluding processor config keys
|
||||
# Processor configs are stored separately in processor-specific JSON files
|
||||
# Use deepcopy to prevent mutations from affecting the original Watch object
|
||||
import copy
|
||||
|
||||
# Acquire datastore lock to prevent concurrent modifications during copy
|
||||
# Take a quick shallow snapshot under lock, then deep copy outside lock
|
||||
lock = self.__datastore.lock if self.__datastore and hasattr(self.__datastore, 'lock') else None
|
||||
# Get base snapshot with lock
|
||||
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
@@ -1025,20 +990,17 @@ class model(watch_base):
|
||||
else:
|
||||
snapshot = dict(self)
|
||||
|
||||
# Deep copy snapshot (slower, but done outside lock to minimize contention)
|
||||
# Exclude processor config keys (stored separately)
|
||||
watch_dict = {k: copy.deepcopy(v) for k, v in snapshot.items() if not k.startswith('processor_config_')}
|
||||
|
||||
# Normalize browser_steps: if no meaningful steps, save as empty list
|
||||
if not self.has_browser_steps:
|
||||
watch_dict['browser_steps'] = []
|
||||
|
||||
# Use existing atomic write helper
|
||||
from changedetectionio.store.file_saving_datastore import save_watch_atomic
|
||||
try:
|
||||
save_watch_atomic(self.watch_data_dir, self.get('uuid'), watch_dict)
|
||||
logger.debug(f"Committed watch {self.get('uuid')}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to commit watch {self.get('uuid')}: {e}")
|
||||
return watch_dict
|
||||
|
||||
# _save_to_disk() method provided by EntityPersistenceMixin
|
||||
# commit() method inherited from watch_base
|
||||
|
||||
|
||||
def extra_notification_token_values(self):
|
||||
@@ -1070,7 +1032,7 @@ class model(watch_base):
|
||||
if not csv_writer:
|
||||
# A file on the disk can be transferred much faster via flask than a string reply
|
||||
csv_output_filename = f"report-{self.get('uuid')}.csv"
|
||||
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
|
||||
f = open(os.path.join(self.data_dir, csv_output_filename), 'w')
|
||||
# @todo some headers in the future
|
||||
#fieldnames = ['Epoch seconds', 'Date']
|
||||
csv_writer = csv.writer(f,
|
||||
@@ -1112,7 +1074,7 @@ class model(watch_base):
|
||||
|
||||
def save_error_text(self, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||
target_path = os.path.join(self.data_dir, "last-error.txt")
|
||||
with open(target_path, 'w', encoding='utf-8') as f:
|
||||
f.write(contents)
|
||||
|
||||
@@ -1121,9 +1083,9 @@ class model(watch_base):
|
||||
import zlib
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(str(self.watch_data_dir), "elements-error.deflate")
|
||||
target_path = os.path.join(str(self.data_dir), "elements-error.deflate")
|
||||
else:
|
||||
target_path = os.path.join(str(self.watch_data_dir), "elements.deflate")
|
||||
target_path = os.path.join(str(self.data_dir), "elements.deflate")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
@@ -1138,9 +1100,9 @@ class model(watch_base):
|
||||
def save_screenshot(self, screenshot: bytes, as_error=False):
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||
target_path = os.path.join(self.data_dir, "last-error-screenshot.png")
|
||||
else:
|
||||
target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||
target_path = os.path.join(self.data_dir, "last-screenshot.png")
|
||||
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
@@ -1151,7 +1113,7 @@ class model(watch_base):
|
||||
|
||||
def get_last_fetched_text_before_filters(self):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
filepath = os.path.join(self.data_dir, 'last-fetched.br')
|
||||
|
||||
if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0:
|
||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||
@@ -1166,13 +1128,13 @@ class model(watch_base):
|
||||
|
||||
def save_last_text_fetched_before_filters(self, contents):
|
||||
import brotli
|
||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||
filepath = os.path.join(self.data_dir, 'last-fetched.br')
|
||||
_brotli_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False)
|
||||
|
||||
def save_last_fetched_html(self, timestamp, contents):
|
||||
self.ensure_data_dir_exists()
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
_brotli_save(contents, filepath, mode=None, fallback_uncompressed=True)
|
||||
self._prune_last_fetched_html_snapshots()
|
||||
|
||||
@@ -1180,7 +1142,7 @@ class model(watch_base):
|
||||
import brotli
|
||||
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
if os.path.isfile(filepath):
|
||||
with open(filepath, 'rb') as f:
|
||||
return (brotli.decompress(f.read()).decode('utf-8'))
|
||||
@@ -1195,7 +1157,7 @@ class model(watch_base):
|
||||
|
||||
for index, timestamp in enumerate(dates):
|
||||
snapshot_fname = f"{timestamp}.html.br"
|
||||
filepath = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
filepath = os.path.join(self.data_dir, snapshot_fname)
|
||||
|
||||
# Keep only the first 2
|
||||
if index > 1 and os.path.isfile(filepath):
|
||||
@@ -1206,7 +1168,7 @@ class model(watch_base):
|
||||
def get_browsersteps_available_screenshots(self):
|
||||
"For knowing which screenshots are available to show the user in BrowserSteps UI"
|
||||
available = []
|
||||
for f in Path(self.watch_data_dir).glob('step_before-*.jpeg'):
|
||||
for f in Path(self.data_dir).glob('step_before-*.jpeg'):
|
||||
step_n=re.search(r'step_before-(\d+)', f.name)
|
||||
if step_n:
|
||||
available.append(step_n.group(1))
|
||||
|
||||
@@ -2,9 +2,14 @@ import os
|
||||
import uuid
|
||||
|
||||
from changedetectionio import strtobool
|
||||
from .persistence import EntityPersistenceMixin
|
||||
|
||||
__all__ = ['EntityPersistenceMixin', 'watch_base']
|
||||
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||
|
||||
|
||||
class watch_base(dict):
|
||||
"""
|
||||
Base watch domain model (inherits from dict for backward compatibility).
|
||||
@@ -138,7 +143,7 @@ class watch_base(dict):
|
||||
|
||||
Instance Attributes (not serialized):
|
||||
__datastore: Reference to parent DataStore (set externally after creation)
|
||||
watch_data_dir: Filesystem path for this watch's data directory
|
||||
data_dir: Filesystem path for this watch's data directory
|
||||
|
||||
Notes:
|
||||
- Many fields default to None to distinguish "not set" from "set to default"
|
||||
@@ -149,6 +154,17 @@ class watch_base(dict):
|
||||
"""
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
# Store datastore reference (common to Watch and Tag)
|
||||
# Use single underscore to avoid name mangling issues in subclasses
|
||||
self._datastore = kw.get('__datastore')
|
||||
if kw.get('__datastore'):
|
||||
del kw['__datastore']
|
||||
|
||||
# Store datastore_path (common to Watch and Tag)
|
||||
self._datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
del kw['datastore_path']
|
||||
|
||||
self.update({
|
||||
# Custom notification content
|
||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||
@@ -318,8 +334,10 @@ class watch_base(dict):
|
||||
attr_value = getattr(self, attr_name)
|
||||
|
||||
# Special handling: Share references to large objects instead of copying
|
||||
# Examples: __datastore, __app_reference, __global_settings, etc.
|
||||
if attr_name.endswith('__datastore') or attr_name.endswith('__app'):
|
||||
# Examples: _datastore, __datastore, __app_reference, __global_settings, etc.
|
||||
if (attr_name == '_datastore' or
|
||||
attr_name.endswith('__datastore') or
|
||||
attr_name.endswith('__app')):
|
||||
# Share the reference (don't copy!) to prevent memory leaks
|
||||
setattr(new_obj, attr_name, attr_value)
|
||||
# Skip cache attributes - let them regenerate on demand
|
||||
@@ -349,7 +367,8 @@ class watch_base(dict):
|
||||
try:
|
||||
attr_value = getattr(self, attr_name)
|
||||
# Exclude large reference objects and caches from serialization
|
||||
if not (attr_name.endswith('__datastore') or
|
||||
if not (attr_name == '_datastore' or
|
||||
attr_name.endswith('__datastore') or
|
||||
attr_name.endswith('__app') or
|
||||
'cache' in attr_name.lower() or
|
||||
callable(attr_value)):
|
||||
@@ -377,4 +396,126 @@ class watch_base(dict):
|
||||
|
||||
# Restore instance attributes
|
||||
for attr_name, attr_value in metadata.items():
|
||||
setattr(self, attr_name, attr_value)
|
||||
setattr(self, attr_name, attr_value)
|
||||
|
||||
@property
|
||||
def data_dir(self):
|
||||
"""
|
||||
The base directory for this watch/tag data (property, computed from UUID).
|
||||
|
||||
Common property for both Watch and Tag objects.
|
||||
Returns path like: /datastore/{uuid}/
|
||||
"""
|
||||
return os.path.join(self._datastore_path, self['uuid']) if self._datastore_path else None
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
"""
|
||||
Create the data directory if it doesn't exist.
|
||||
|
||||
Common method for both Watch and Tag objects.
|
||||
"""
|
||||
from loguru import logger
|
||||
if not os.path.isdir(self.data_dir):
|
||||
logger.debug(f"> Creating data dir {self.data_dir}")
|
||||
os.mkdir(self.data_dir)
|
||||
|
||||
def get_global_setting(self, *path):
|
||||
"""
|
||||
Get a setting from the global datastore configuration.
|
||||
|
||||
Args:
|
||||
*path: Path to the setting (e.g., 'application', 'history_snapshot_max_length')
|
||||
|
||||
Returns:
|
||||
The setting value, or None if not found
|
||||
|
||||
Example:
|
||||
maxlen = self.get_global_setting('application', 'history_snapshot_max_length')
|
||||
"""
|
||||
if not self._datastore:
|
||||
return None
|
||||
|
||||
try:
|
||||
value = self._datastore['settings']
|
||||
for key in path:
|
||||
value = value[key]
|
||||
return value
|
||||
except (KeyError, TypeError):
|
||||
return None
|
||||
|
||||
def _get_commit_data(self):
|
||||
"""
|
||||
Prepare data for commit (can be overridden by subclasses).
|
||||
|
||||
Returns:
|
||||
dict: Data to serialize (filtered as needed by subclass)
|
||||
"""
|
||||
import copy
|
||||
|
||||
# Acquire datastore lock to prevent concurrent modifications during copy
|
||||
lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None
|
||||
|
||||
if lock:
|
||||
with lock:
|
||||
snapshot = dict(self)
|
||||
else:
|
||||
snapshot = dict(self)
|
||||
|
||||
# Deep copy snapshot (slower, but done outside lock to minimize contention)
|
||||
# Subclasses can override to filter keys (e.g., Watch excludes processor_config_*)
|
||||
return {k: copy.deepcopy(v) for k, v in snapshot.items()}
|
||||
|
||||
def _save_to_disk(self, data_dict, uuid):
|
||||
"""
|
||||
Save data to disk (must be implemented by subclasses).
|
||||
|
||||
Args:
|
||||
data_dict: Dictionary to save
|
||||
uuid: UUID for logging
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If subclass doesn't implement
|
||||
"""
|
||||
raise NotImplementedError("Subclass must implement _save_to_disk()")
|
||||
|
||||
def commit(self):
|
||||
"""
|
||||
Save this watch/tag immediately to disk using atomic write.
|
||||
|
||||
Common commit logic for Watch and Tag objects.
|
||||
Subclasses override _get_commit_data() and _save_to_disk() for specifics.
|
||||
|
||||
Fire-and-forget: Logs errors but does not raise exceptions.
|
||||
Data remains in memory even if save fails, so next commit will retry.
|
||||
"""
|
||||
from loguru import logger
|
||||
|
||||
if not self.data_dir:
|
||||
entity_type = self.__class__.__name__
|
||||
logger.error(f"Cannot commit {entity_type} {self.get('uuid')} without datastore_path")
|
||||
return
|
||||
|
||||
uuid = self.get('uuid')
|
||||
if not uuid:
|
||||
entity_type = self.__class__.__name__
|
||||
logger.error(f"Cannot commit {entity_type} without UUID")
|
||||
return
|
||||
|
||||
# Get data from subclass (may filter keys)
|
||||
try:
|
||||
data_dict = self._get_commit_data()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to prepare commit data for {uuid}: {e}")
|
||||
return
|
||||
|
||||
# Save to disk via subclass implementation
|
||||
try:
|
||||
# Determine entity type from module name (Watch.py -> watch, Tag.py -> tag)
|
||||
from changedetectionio.model.persistence import _determine_entity_type
|
||||
entity_type = _determine_entity_type(self.__class__)
|
||||
filename = f"{entity_type}.json"
|
||||
|
||||
self._save_to_disk(data_dict, uuid)
|
||||
logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to commit {uuid}: {e}")
|
||||
84
changedetectionio/model/persistence.py
Normal file
84
changedetectionio/model/persistence.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
Entity persistence mixin for Watch and Tag models.
|
||||
|
||||
Provides file-based persistence using atomic writes.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import inspect
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _determine_entity_type(cls):
|
||||
"""
|
||||
Determine entity type from class hierarchy (cached at class level).
|
||||
|
||||
Args:
|
||||
cls: The class to inspect
|
||||
|
||||
Returns:
|
||||
str: Entity type ('watch', 'tag', etc.)
|
||||
|
||||
Raises:
|
||||
ValueError: If entity type cannot be determined
|
||||
"""
|
||||
for base_class in inspect.getmro(cls):
|
||||
module_name = base_class.__module__
|
||||
if module_name.startswith('changedetectionio.model.'):
|
||||
# Get last part after dot: "changedetectionio.model.Watch" -> "watch"
|
||||
return module_name.split('.')[-1].lower()
|
||||
|
||||
raise ValueError(
|
||||
f"Cannot determine entity type for {cls.__module__}.{cls.__name__}. "
|
||||
f"Entity must inherit from a class in changedetectionio.model (Watch or Tag)."
|
||||
)
|
||||
|
||||
|
||||
class EntityPersistenceMixin:
|
||||
"""
|
||||
Mixin providing file persistence for watch_base subclasses (Watch, Tag, etc.).
|
||||
|
||||
This mixin provides the _save_to_disk() method required by watch_base.commit().
|
||||
It automatically determines the correct filename and size limits based on class hierarchy.
|
||||
|
||||
Usage:
|
||||
class model(EntityPersistenceMixin, watch_base): # in Watch.py
|
||||
pass
|
||||
|
||||
class model(EntityPersistenceMixin, watch_base): # in Tag.py
|
||||
pass
|
||||
"""
|
||||
|
||||
def _save_to_disk(self, data_dict, uuid):
|
||||
"""
|
||||
Save entity to disk using atomic write.
|
||||
|
||||
Implements the abstract method required by watch_base.commit().
|
||||
Automatically determines filename and size limits from class hierarchy.
|
||||
|
||||
Args:
|
||||
data_dict: Dictionary to save
|
||||
uuid: UUID for logging
|
||||
|
||||
Raises:
|
||||
ValueError: If entity type cannot be determined from class hierarchy
|
||||
"""
|
||||
# Import here to avoid circular dependency
|
||||
from changedetectionio.store.file_saving_datastore import save_entity_atomic
|
||||
|
||||
# Determine entity type (cached at class level, not instance level)
|
||||
entity_type = _determine_entity_type(self.__class__)
|
||||
|
||||
# Set filename and size limits based on entity type
|
||||
filename = f'{entity_type}.json'
|
||||
max_size_mb = 10 if entity_type == 'watch' else 1
|
||||
|
||||
# Save using generic function
|
||||
save_entity_atomic(
|
||||
self.data_dir,
|
||||
uuid,
|
||||
data_dict,
|
||||
filename=filename,
|
||||
entity_type=entity_type,
|
||||
max_size_mb=max_size_mb
|
||||
)
|
||||
@@ -193,12 +193,12 @@ class difference_detection_processor():
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
data_dir = watch.data_dir
|
||||
|
||||
if not watch_data_dir:
|
||||
if not data_dir:
|
||||
return {}
|
||||
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
filepath = os.path.join(data_dir, filename)
|
||||
|
||||
if not os.path.isfile(filepath):
|
||||
return {}
|
||||
@@ -223,16 +223,16 @@ class difference_detection_processor():
|
||||
import os
|
||||
|
||||
watch = self.datastore.data['watching'].get(self.watch_uuid)
|
||||
watch_data_dir = watch.watch_data_dir
|
||||
data_dir = watch.data_dir
|
||||
|
||||
if not watch_data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no watch_data_dir")
|
||||
if not data_dir:
|
||||
logger.warning(f"Cannot save extra watch config {filename}: no data_dir")
|
||||
return
|
||||
|
||||
# Ensure directory exists
|
||||
watch.ensure_data_dir_exists()
|
||||
|
||||
filepath = os.path.join(watch_data_dir, filename)
|
||||
filepath = os.path.join(data_dir, filename)
|
||||
|
||||
try:
|
||||
# If merge is enabled, read existing data first
|
||||
|
||||
@@ -414,7 +414,7 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect)
|
||||
|
||||
# Load historical data if available (for charts/visualization)
|
||||
comparison_data = {}
|
||||
comparison_config_path = os.path.join(watch.watch_data_dir, "visual_comparison_data.json")
|
||||
comparison_config_path = os.path.join(watch.data_dir, "visual_comparison_data.json")
|
||||
if os.path.isfile(comparison_config_path):
|
||||
try:
|
||||
with open(comparison_config_path, 'r') as f:
|
||||
|
||||
@@ -90,7 +90,7 @@ def on_config_save(watch, processor_config, datastore):
|
||||
processor_config['auto_track_region'] = False
|
||||
|
||||
# Remove old template file if exists
|
||||
template_path = os.path.join(watch.watch_data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
template_path = os.path.join(watch.data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME)
|
||||
if os.path.exists(template_path):
|
||||
os.remove(template_path)
|
||||
logger.debug(f"Removed old template file: {template_path}")
|
||||
|
||||
@@ -55,7 +55,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
|
||||
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
|
||||
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.data_dir):
|
||||
# Splice in the temporary stuff from the form
|
||||
form = forms.processor_text_json_diff_form(formdata=form_data if request.method == 'POST' else None,
|
||||
data=form_data
|
||||
@@ -68,7 +68,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
blank_watch_no_filters['url'] = tmp_watch.get('url')
|
||||
|
||||
latest_filename = next(reversed(tmp_watch.history))
|
||||
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
html_fname = os.path.join(tmp_watch.data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
|
||||
|
||||
|
||||
@@ -184,7 +184,8 @@ $(document).ready(function() {
|
||||
}
|
||||
// If it's a button in a form, submit the form
|
||||
else if ($element.is('button')) {
|
||||
$element.closest('form').submit();
|
||||
// Use requestSubmit() to include the button's name/value in the form data
|
||||
$element.closest('form')[0].requestSubmit($element[0]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -33,7 +33,7 @@ except ImportError:
|
||||
from ..processors import get_custom_watch_obj_for_processor
|
||||
|
||||
# Import the base class and helpers
|
||||
from .file_saving_datastore import FileSavingDataStore, load_all_watches, save_watch_atomic, save_json_atomic
|
||||
from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_watch_atomic, save_tag_atomic, save_json_atomic
|
||||
from .updates import DatastoreUpdatesMixin
|
||||
from .legacy_loader import has_legacy_datastore
|
||||
|
||||
@@ -123,10 +123,17 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
self.__data['settings']['application'].update(settings_data['settings']['application'])
|
||||
|
||||
def _rehydrate_tags(self):
|
||||
"""Rehydrate tag entities from stored data."""
|
||||
"""Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
|
||||
from ..model import Tag
|
||||
|
||||
for uuid, tag in self.__data['settings']['application']['tags'].items():
|
||||
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(
|
||||
uuid, tag, processor_override='restock_diff'
|
||||
# Force processor to restock_diff for override functionality (technical debt)
|
||||
tag['processor'] = 'restock_diff'
|
||||
|
||||
self.__data['settings']['application']['tags'][uuid] = Tag.model(
|
||||
datastore_path=self.datastore_path,
|
||||
__datastore=self.__data,
|
||||
default=tag
|
||||
)
|
||||
logger.info(f"Tag: {uuid} {tag['title']}")
|
||||
|
||||
@@ -148,7 +155,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
"""
|
||||
Load complete datastore state from storage.
|
||||
|
||||
Orchestrates loading of settings and watches using polymorphic methods.
|
||||
Orchestrates loading of settings, watches, and tags using polymorphic methods.
|
||||
"""
|
||||
# Load settings
|
||||
settings_data = self._load_settings()
|
||||
@@ -157,7 +164,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
# Load watches (polymorphic - parent class method)
|
||||
self._load_watches()
|
||||
|
||||
# Rehydrate tags
|
||||
# Load tags from individual tag.json files
|
||||
# These will override any tags in settings (migration path)
|
||||
self._load_tags()
|
||||
|
||||
# Rehydrate any remaining tags from settings (legacy/fallback)
|
||||
self._rehydrate_tags()
|
||||
|
||||
def reload_state(self, datastore_path, include_default_watches, version_tag):
|
||||
@@ -232,8 +243,32 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
if not legacy_data:
|
||||
raise Exception("Failed to load legacy datastore from url-watches.json")
|
||||
|
||||
# Store the loaded data
|
||||
self.__data = legacy_data
|
||||
# Merge legacy data with base_config defaults (preserves new fields like 'ui')
|
||||
# self.__data already has App.model() defaults from line 190
|
||||
logger.info("Merging legacy data with base_config defaults...")
|
||||
|
||||
# Apply top-level fields from legacy data
|
||||
if 'app_guid' in legacy_data:
|
||||
self.__data['app_guid'] = legacy_data['app_guid']
|
||||
if 'build_sha' in legacy_data:
|
||||
self.__data['build_sha'] = legacy_data['build_sha']
|
||||
if 'version_tag' in legacy_data:
|
||||
self.__data['version_tag'] = legacy_data['version_tag']
|
||||
|
||||
# Apply watching data (complete replacement as these are user's watches)
|
||||
if 'watching' in legacy_data:
|
||||
self.__data['watching'] = legacy_data['watching']
|
||||
|
||||
# Merge settings sections (preserves base_config defaults for missing fields)
|
||||
if 'settings' in legacy_data:
|
||||
if 'headers' in legacy_data['settings']:
|
||||
self.__data['settings']['headers'].update(legacy_data['settings']['headers'])
|
||||
if 'requests' in legacy_data['settings']:
|
||||
self.__data['settings']['requests'].update(legacy_data['settings']['requests'])
|
||||
if 'application' in legacy_data['settings']:
|
||||
# CRITICAL: Use .update() to merge, not replace
|
||||
# This preserves new fields like 'ui' that exist in base_config
|
||||
self.__data['settings']['application'].update(legacy_data['settings']['application'])
|
||||
|
||||
# CRITICAL: Rehydrate watches from dicts into Watch objects
|
||||
# This ensures watches have their methods available during migration
|
||||
@@ -336,13 +371,30 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
"""
|
||||
Build settings data structure for saving.
|
||||
|
||||
Tags behavior depends on schema version:
|
||||
- Before update_28 (schema < 28): Tags saved in settings for migration
|
||||
- After update_28 (schema >= 28): Tags excluded from settings (in individual files)
|
||||
|
||||
Returns:
|
||||
dict: Settings data ready for serialization
|
||||
"""
|
||||
import copy
|
||||
|
||||
# Deep copy settings to avoid modifying the original
|
||||
settings_copy = copy.deepcopy(self.__data['settings'])
|
||||
|
||||
# Only exclude tags if we've already migrated them to individual files (schema >= 28)
|
||||
# This ensures update_28 can migrate tags from settings
|
||||
schema_version = self.__data['settings']['application'].get('schema_version', 0)
|
||||
if schema_version >= 28:
|
||||
# Tags are in individual tag.json files, don't save to settings
|
||||
settings_copy['application']['tags'] = {}
|
||||
# else: keep tags in settings for update_28 migration
|
||||
|
||||
return {
|
||||
'note': 'Settings file - watches are stored in individual {uuid}/watch.json files',
|
||||
'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
|
||||
'app_guid': self.__data['app_guid'],
|
||||
'settings': self.__data['settings'],
|
||||
'settings': settings_copy,
|
||||
'build_sha': self.__data.get('build_sha'),
|
||||
'version_tag': self.__data.get('version_tag')
|
||||
}
|
||||
@@ -360,7 +412,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
"""
|
||||
settings_data = self._build_settings_data()
|
||||
changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
|
||||
save_json_atomic(changedetection_json, settings_data, label="settings", max_size_mb=10)
|
||||
save_json_atomic(changedetection_json, settings_data, label="settings")
|
||||
|
||||
def _load_watches(self):
|
||||
"""
|
||||
@@ -380,6 +432,37 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
logger.debug(f"Loaded {len(watching)} watches")
|
||||
|
||||
def _load_tags(self):
|
||||
"""
|
||||
Load all tags from storage.
|
||||
|
||||
File backend implementation: reads individual tag.json files.
|
||||
Tags loaded from files override any tags in settings (migration path).
|
||||
"""
|
||||
from ..model import Tag
|
||||
|
||||
def rehydrate_tag(uuid, entity_dict):
|
||||
"""Rehydrate tag as Tag object with forced restock_diff processor."""
|
||||
entity_dict['uuid'] = uuid
|
||||
entity_dict['processor'] = 'restock_diff' # Force processor for override functionality
|
||||
|
||||
return Tag.model(
|
||||
datastore_path=self.datastore_path,
|
||||
__datastore=self.__data,
|
||||
default=entity_dict
|
||||
)
|
||||
|
||||
tags = load_all_tags(
|
||||
self.datastore_path,
|
||||
rehydrate_tag
|
||||
)
|
||||
|
||||
# Override settings tags with loaded tags
|
||||
# This ensures tag.json files take precedence over settings
|
||||
if tags:
|
||||
self.__data['settings']['application']['tags'].update(tags)
|
||||
logger.info(f"Loaded {len(tags)} tags from individual tag.json files")
|
||||
|
||||
def _delete_watch(self, uuid):
|
||||
"""
|
||||
Delete a watch from storage.
|
||||
@@ -706,25 +789,6 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
# Old sync_to_json and save_datastore methods removed - now handled by FileSavingDataStore parent class
|
||||
|
||||
# Go through the datastore path and remove any snapshots that are not mentioned in the index
|
||||
# This usually is not used, but can be handy.
|
||||
def remove_unused_snapshots(self):
|
||||
logger.info("Removing snapshots from datastore that are not in the index..")
|
||||
|
||||
index = []
|
||||
for uuid in self.data['watching']:
|
||||
for id in self.data['watching'][uuid].history:
|
||||
index.append(self.data['watching'][uuid].history[str(id)])
|
||||
|
||||
import pathlib
|
||||
|
||||
# Only in the sub-directories
|
||||
for uuid in self.data['watching']:
|
||||
for item in pathlib.Path(self.datastore_path).rglob(uuid + "/*.txt"):
|
||||
if not str(item) in index:
|
||||
logger.info(f"Removing {item}")
|
||||
unlink(item)
|
||||
|
||||
@property
|
||||
def proxy_list(self):
|
||||
proxy_list = {}
|
||||
@@ -816,7 +880,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
if watch:
|
||||
|
||||
# In /datastore/xyz-xyz/headers.txt
|
||||
filepath = os.path.join(watch.watch_data_dir, 'headers.txt')
|
||||
filepath = os.path.join(watch.data_dir, 'headers.txt')
|
||||
try:
|
||||
if os.path.isfile(filepath):
|
||||
headers.update(parse_headers_from_text_file(filepath))
|
||||
@@ -876,7 +940,8 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
self.__data['settings']['application']['tags'][new_uuid] = new_tag
|
||||
|
||||
self.commit()
|
||||
# Save tag to its own tag.json file instead of settings
|
||||
new_tag.commit()
|
||||
return new_uuid
|
||||
|
||||
def get_all_tags_for_watch(self, uuid):
|
||||
|
||||
@@ -175,23 +175,46 @@ def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
|
||||
raise e
|
||||
|
||||
|
||||
def save_entity_atomic(entity_dir, uuid, entity_dict, filename, entity_type, max_size_mb):
|
||||
"""
|
||||
Save an entity (watch/tag) to disk using atomic write pattern.
|
||||
|
||||
Generic function for saving any watch_base subclass (Watch, Tag, etc.).
|
||||
|
||||
Args:
|
||||
entity_dir: Directory for this entity (e.g., /datastore/{uuid})
|
||||
uuid: Entity UUID (for logging)
|
||||
entity_dict: Dictionary representation of the entity
|
||||
filename: JSON filename (e.g., 'watch.json', 'tag.json')
|
||||
entity_type: Type label for logging (e.g., 'watch', 'tag')
|
||||
max_size_mb: Maximum allowed file size in MB
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds max_size_mb
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
"""
|
||||
entity_json = os.path.join(entity_dir, filename)
|
||||
save_json_atomic(entity_json, entity_dict, label=f"{entity_type} {uuid}", max_size_mb=max_size_mb)
|
||||
|
||||
|
||||
def save_watch_atomic(watch_dir, uuid, watch_dict):
|
||||
"""
|
||||
Save a watch to disk using atomic write pattern.
|
||||
|
||||
Convenience wrapper around save_json_atomic for watches.
|
||||
|
||||
Args:
|
||||
watch_dir: Directory for this watch (e.g., /datastore/{uuid})
|
||||
uuid: Watch UUID (for logging)
|
||||
watch_dict: Dictionary representation of the watch
|
||||
|
||||
Raises:
|
||||
ValueError: If serialized data exceeds 10MB (indicates bug or corruption)
|
||||
OSError: If disk is full (ENOSPC) or other I/O error
|
||||
Convenience wrapper around save_entity_atomic for watches.
|
||||
Kept for backwards compatibility.
|
||||
"""
|
||||
watch_json = os.path.join(watch_dir, "watch.json")
|
||||
save_json_atomic(watch_json, watch_dict, label=f"watch {uuid}", max_size_mb=10)
|
||||
save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)
|
||||
|
||||
|
||||
def save_tag_atomic(tag_dir, uuid, tag_dict):
|
||||
"""
|
||||
Save a tag to disk using atomic write pattern.
|
||||
|
||||
Convenience wrapper around save_entity_atomic for tags.
|
||||
Kept for backwards compatibility.
|
||||
"""
|
||||
save_entity_atomic(tag_dir, uuid, tag_dict, "tag.json", "tag", max_size_mb=1)
|
||||
|
||||
|
||||
def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
@@ -204,8 +227,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
rehydrate_entity_func: Function to convert dict to Watch object
|
||||
|
||||
Returns:
|
||||
Tuple of (Watch object, raw_data_dict) or (None, None) if failed
|
||||
The raw_data_dict is needed to compute the hash before rehydration
|
||||
Watch object or None if failed
|
||||
"""
|
||||
try:
|
||||
# Check file size before reading
|
||||
@@ -218,7 +240,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
f"File: {watch_json}. This indicates a bug or data corruption. "
|
||||
f"Watch will be skipped."
|
||||
)
|
||||
return None, None
|
||||
return None
|
||||
|
||||
if HAS_ORJSON:
|
||||
with open(watch_json, 'rb') as f:
|
||||
@@ -227,10 +249,9 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
with open(watch_json, 'r', encoding='utf-8') as f:
|
||||
watch_data = json.load(f)
|
||||
|
||||
# Return both the raw data and the rehydrated watch
|
||||
# Raw data is needed to compute hash before rehydration changes anything
|
||||
# Rehydrate and return watch object
|
||||
watch_obj = rehydrate_entity_func(uuid, watch_data)
|
||||
return watch_obj, watch_data
|
||||
return watch_obj
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.critical(
|
||||
@@ -238,7 +259,7 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None, None
|
||||
return None
|
||||
except ValueError as e:
|
||||
# orjson raises ValueError for invalid JSON
|
||||
if "invalid json" in str(e).lower() or HAS_ORJSON:
|
||||
@@ -247,15 +268,15 @@ def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
|
||||
f"File: {watch_json}. Error: {e}. "
|
||||
f"Watch will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None, None
|
||||
return None
|
||||
# Re-raise if it's not a JSON parsing error
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
|
||||
return None, None
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
|
||||
return None, None
|
||||
return None
|
||||
|
||||
|
||||
def load_all_watches(datastore_path, rehydrate_entity_func):
|
||||
@@ -295,8 +316,8 @@ def load_all_watches(datastore_path, rehydrate_entity_func):
|
||||
for watch_json in watch_files:
|
||||
# Extract UUID from path: /datastore/{uuid}/watch.json
|
||||
uuid_dir = os.path.basename(os.path.dirname(watch_json))
|
||||
watch, raw_data = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
|
||||
if watch and raw_data:
|
||||
watch = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
|
||||
if watch:
|
||||
watching[uuid_dir] = watch
|
||||
loaded += 1
|
||||
|
||||
@@ -320,6 +341,122 @@ def load_all_watches(datastore_path, rehydrate_entity_func):
|
||||
return watching
|
||||
|
||||
|
||||
def load_tag_from_file(tag_json, uuid, rehydrate_entity_func):
|
||||
"""
|
||||
Load a tag from its JSON file.
|
||||
|
||||
Args:
|
||||
tag_json: Path to the tag.json file
|
||||
uuid: Tag UUID
|
||||
rehydrate_entity_func: Function to convert dict to Tag object
|
||||
|
||||
Returns:
|
||||
Tag object or None if failed
|
||||
"""
|
||||
try:
|
||||
# Check file size before reading
|
||||
file_size = os.path.getsize(tag_json)
|
||||
MAX_TAG_SIZE = 1 * 1024 * 1024 # 1MB
|
||||
if file_size > MAX_TAG_SIZE:
|
||||
logger.critical(
|
||||
f"CORRUPTED TAG DATA: Tag {uuid} file is unexpectedly large: "
|
||||
f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_TAG_SIZE / 1024 / 1024}MB). "
|
||||
f"File: {tag_json}. This indicates a bug or data corruption. "
|
||||
f"Tag will be skipped."
|
||||
)
|
||||
return None
|
||||
|
||||
if HAS_ORJSON:
|
||||
with open(tag_json, 'rb') as f:
|
||||
tag_data = orjson.loads(f.read())
|
||||
else:
|
||||
with open(tag_json, 'r', encoding='utf-8') as f:
|
||||
tag_data = json.load(f)
|
||||
|
||||
tag_data['processor'] = 'restock_diff'
|
||||
# Rehydrate tag (convert dict to Tag object)
|
||||
# processor_override is set inside the rehydration function
|
||||
tag_obj = rehydrate_entity_func(uuid, tag_data)
|
||||
return tag_obj
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.critical(
|
||||
f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
|
||||
f"File: {tag_json}. Error: {e}. "
|
||||
f"Tag will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
except ValueError as e:
|
||||
# orjson raises ValueError for invalid JSON
|
||||
if "invalid json" in str(e).lower() or HAS_ORJSON:
|
||||
logger.critical(
|
||||
f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
|
||||
f"File: {tag_json}. Error: {e}. "
|
||||
f"Tag will be skipped and may need manual recovery from backup."
|
||||
)
|
||||
return None
|
||||
# Re-raise if it's not a JSON parsing error
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
logger.debug(f"Tag file not found: {tag_json} for tag {uuid}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load tag {uuid} from {tag_json}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def load_all_tags(datastore_path, rehydrate_entity_func):
|
||||
"""
|
||||
Load all tags from individual tag.json files.
|
||||
|
||||
Tags are stored separately from settings in {uuid}/tag.json files.
|
||||
|
||||
Args:
|
||||
datastore_path: Path to the datastore directory
|
||||
rehydrate_entity_func: Function to convert dict to Tag object
|
||||
|
||||
Returns:
|
||||
Dictionary of uuid -> Tag object
|
||||
"""
|
||||
logger.info("Loading tags from individual tag.json files...")
|
||||
|
||||
tags = {}
|
||||
|
||||
if not os.path.exists(datastore_path):
|
||||
return tags
|
||||
|
||||
# Find all tag.json files using glob
|
||||
tag_files = glob.glob(os.path.join(datastore_path, "*", "tag.json"))
|
||||
|
||||
total = len(tag_files)
|
||||
if total == 0:
|
||||
logger.debug("No tag.json files found")
|
||||
return tags
|
||||
|
||||
logger.debug(f"Found {total} tag.json files")
|
||||
|
||||
loaded = 0
|
||||
failed = 0
|
||||
|
||||
for tag_json in tag_files:
|
||||
# Extract UUID from path: /datastore/{uuid}/tag.json
|
||||
uuid_dir = os.path.basename(os.path.dirname(tag_json))
|
||||
tag = load_tag_from_file(tag_json, uuid_dir, rehydrate_entity_func)
|
||||
if tag:
|
||||
tags[uuid_dir] = tag
|
||||
loaded += 1
|
||||
else:
|
||||
# load_tag_from_file already logged the specific error
|
||||
failed += 1
|
||||
|
||||
if failed > 0:
|
||||
logger.warning(f"Loaded {loaded} tags, {failed} tags FAILED to load")
|
||||
else:
|
||||
logger.info(f"Loaded {loaded} tags from disk")
|
||||
|
||||
return tags
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# FileSavingDataStore Class
|
||||
# ============================================================================
|
||||
|
||||
@@ -29,6 +29,7 @@ def create_backup_tarball(datastore_path, update_number):
|
||||
|
||||
Includes:
|
||||
- All {uuid}/watch.json files
|
||||
- All {uuid}/tag.json files
|
||||
- changedetection.json (settings, if it exists)
|
||||
- url-watches.json (legacy format, if it exists)
|
||||
- Directory structure preserved
|
||||
@@ -44,7 +45,7 @@ def create_backup_tarball(datastore_path, update_number):
|
||||
To restore from a backup:
|
||||
cd /path/to/datastore
|
||||
tar -xzf before-update-N-timestamp.tar.gz
|
||||
This will restore all watch.json files and settings to their pre-update state.
|
||||
This will restore all watch.json and tag.json files and settings to their pre-update state.
|
||||
"""
|
||||
timestamp = int(time.time())
|
||||
backup_filename = f"before-update-{update_number}-{timestamp}.tar.gz"
|
||||
@@ -66,9 +67,10 @@ def create_backup_tarball(datastore_path, update_number):
|
||||
tar.add(url_watches_json, arcname="url-watches.json")
|
||||
logger.debug("Added url-watches.json to backup")
|
||||
|
||||
# Backup all watch directories with their watch.json files
|
||||
# Backup all watch/tag directories with their JSON files
|
||||
# This preserves the UUID directory structure
|
||||
watch_count = 0
|
||||
tag_count = 0
|
||||
for entry in os.listdir(datastore_path):
|
||||
entry_path = os.path.join(datastore_path, entry)
|
||||
|
||||
@@ -80,17 +82,22 @@ def create_backup_tarball(datastore_path, update_number):
|
||||
if entry.startswith('.') or entry.startswith('before-update-'):
|
||||
continue
|
||||
|
||||
# Check if this directory has a watch.json (indicates it's a watch UUID directory)
|
||||
# Backup watch.json if exists
|
||||
watch_json = os.path.join(entry_path, "watch.json")
|
||||
if os.path.isfile(watch_json):
|
||||
# Add the watch.json file preserving directory structure
|
||||
tar.add(watch_json, arcname=f"{entry}/watch.json")
|
||||
watch_count += 1
|
||||
|
||||
if watch_count % 100 == 0:
|
||||
logger.debug(f"Backed up {watch_count} watch.json files...")
|
||||
|
||||
logger.success(f"Backup created: {backup_filename} ({watch_count} watches)")
|
||||
# Backup tag.json if exists
|
||||
tag_json = os.path.join(entry_path, "tag.json")
|
||||
if os.path.isfile(tag_json):
|
||||
tar.add(tag_json, arcname=f"{entry}/tag.json")
|
||||
tag_count += 1
|
||||
|
||||
logger.success(f"Backup created: {backup_filename} ({watch_count} watches, {tag_count} tags)")
|
||||
return backup_path
|
||||
|
||||
except Exception as e:
|
||||
@@ -147,10 +154,10 @@ class DatastoreUpdatesMixin:
|
||||
2. For each update > current schema version:
|
||||
- Create backup of datastore
|
||||
- Run update method
|
||||
- Update schema version
|
||||
- Mark settings and watches dirty
|
||||
- Update schema version and commit settings
|
||||
- Commit all watches and tags
|
||||
3. If any update fails, stop processing
|
||||
4. Save all changes immediately
|
||||
4. All changes saved via individual .commit() calls
|
||||
"""
|
||||
updates_available = self.get_updates_available()
|
||||
|
||||
@@ -199,26 +206,11 @@ class DatastoreUpdatesMixin:
|
||||
# Don't run any more updates
|
||||
return
|
||||
else:
|
||||
# Bump the version, important
|
||||
# Bump the version
|
||||
self.data['settings']['application']['schema_version'] = update_n
|
||||
self.commit()
|
||||
|
||||
# CRITICAL: Save all watches so changes are persisted
|
||||
# Most updates modify watches, and in the new individual watch.json structure,
|
||||
# we need to ensure those changes are saved
|
||||
logger.info(f"Saving all {len(self.data['watching'])} watches after update_{update_n} (so that it saves them to disk)")
|
||||
for uuid in self.data['watching'].keys():
|
||||
self.data['watching'][uuid].commit()
|
||||
|
||||
# Save changes immediately after each update (more resilient than batching)
|
||||
logger.critical(f"Saving all changes after update_{update_n}")
|
||||
try:
|
||||
self._save_dirty_items()
|
||||
logger.success(f"Update {update_n} changes saved successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save update_{update_n} changes: {e}")
|
||||
# Don't raise - update already ran, but changes might not be persisted
|
||||
# The update will try to run again on next startup
|
||||
logger.success(f"Update {update_n} completed")
|
||||
|
||||
# Track which updates ran
|
||||
updates_ran.append(update_n)
|
||||
@@ -468,6 +460,14 @@ class DatastoreUpdatesMixin:
|
||||
del self.data['watching'][uuid]['extract_title_as_title']
|
||||
|
||||
if self.data['settings']['application'].get('extract_title_as_title'):
|
||||
# Ensure 'ui' key exists (defensive for edge cases where base_config merge didn't happen)
|
||||
if 'ui' not in self.data['settings']['application']:
|
||||
self.data['settings']['application']['ui'] = {
|
||||
'use_page_title_in_list': True,
|
||||
'open_diff_in_new_tab': True,
|
||||
'socket_io_enabled': True,
|
||||
'favicons_enabled': True
|
||||
}
|
||||
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
|
||||
|
||||
def update_21(self):
|
||||
@@ -648,23 +648,6 @@ class DatastoreUpdatesMixin:
|
||||
logger.critical("Reloading datastore from new format...")
|
||||
self._load_state() # Includes load_watches
|
||||
logger.success("Datastore reloaded from new format successfully")
|
||||
|
||||
|
||||
# Verify all watches have hashes after migration
|
||||
missing_hashes = [uuid for uuid in self.data['watching'].keys() if uuid not in self._watch_hashes]
|
||||
if missing_hashes:
|
||||
logger.error(f"WARNING: {len(missing_hashes)} watches missing hashes after migration: {missing_hashes[:5]}")
|
||||
else:
|
||||
logger.success(f"All {len(self.data['watching'])} watches have valid hashes after migration")
|
||||
|
||||
# Set schema version to latest available update
|
||||
# This prevents re-running updates and re-marking all watches as dirty
|
||||
updates_available = self.get_updates_available()
|
||||
latest_schema = updates_available[-1] if updates_available else 26
|
||||
self.data['settings']['application']['schema_version'] = latest_schema
|
||||
self.commit()
|
||||
logger.info(f"Set schema_version to {latest_schema} (migration complete, all watches already saved)")
|
||||
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
|
||||
logger.critical("=" * 80)
|
||||
@@ -683,4 +666,76 @@ class DatastoreUpdatesMixin:
|
||||
logger.info("")
|
||||
|
||||
def update_26(self):
|
||||
self.migrate_legacy_db_format()
|
||||
self.migrate_legacy_db_format()
|
||||
|
||||
def update_28(self):
|
||||
"""
|
||||
Migrate tags to individual tag.json files.
|
||||
|
||||
Tags are currently saved only in changedetection.json (settings).
|
||||
This migration ALSO saves them to individual {uuid}/tag.json files,
|
||||
similar to how watches are stored (dual storage).
|
||||
|
||||
Benefits:
|
||||
- Allows atomic tag updates without rewriting entire settings
|
||||
- Enables independent tag versioning/backup
|
||||
- Maintains backwards compatibility (tags stay in settings too)
|
||||
"""
|
||||
logger.critical("=" * 80)
|
||||
logger.critical("Running migration: Individual tag persistence (update_28)")
|
||||
logger.critical("Creating individual tag.json files (tags remain in settings too)")
|
||||
logger.critical("=" * 80)
|
||||
|
||||
tags = self.data['settings']['application'].get('tags', {})
|
||||
tag_count = len(tags)
|
||||
|
||||
if tag_count == 0:
|
||||
logger.info("No tags found, skipping migration")
|
||||
return
|
||||
|
||||
logger.info(f"Migrating {tag_count} tags to individual tag.json files...")
|
||||
|
||||
saved_count = 0
|
||||
failed_count = 0
|
||||
|
||||
for uuid, tag_data in tags.items():
|
||||
try:
|
||||
# Force save as tag.json (not watch.json) even if object is corrupted
|
||||
from changedetectionio.store.file_saving_datastore import save_entity_atomic
|
||||
import os
|
||||
|
||||
tag_dir = os.path.join(self.datastore_path, uuid)
|
||||
os.makedirs(tag_dir, exist_ok=True)
|
||||
|
||||
# Convert to dict if it's an object
|
||||
tag_dict = dict(tag_data) if hasattr(tag_data, '__iter__') else tag_data
|
||||
|
||||
# Save explicitly as tag.json
|
||||
save_entity_atomic(
|
||||
tag_dir,
|
||||
uuid,
|
||||
tag_dict,
|
||||
filename='tag.json',
|
||||
entity_type='tag',
|
||||
max_size_mb=1
|
||||
)
|
||||
saved_count += 1
|
||||
|
||||
if saved_count % 10 == 0:
|
||||
logger.info(f" Progress: {saved_count}/{tag_count} tags migrated...")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save tag {uuid} ({tag_data.get('title', 'unknown')}): {e}")
|
||||
failed_count += 1
|
||||
|
||||
if failed_count > 0:
|
||||
logger.warning(f"Migration complete: {saved_count} tags saved, {failed_count} tags FAILED")
|
||||
else:
|
||||
logger.success(f"Migration complete: {saved_count} tags saved to individual tag.json files")
|
||||
|
||||
# Tags remain in settings for backwards compatibility AND easy access
|
||||
# On next load, _load_tags() will read from tag.json files and merge with settings
|
||||
logger.info("Tags saved to both settings AND individual tag.json files")
|
||||
logger.info("Future tag edits will update both locations (dual storage)")
|
||||
|
||||
logger.critical("=" * 80)
|
||||
@@ -489,6 +489,7 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
|
||||
# Test 1: Basic import with tag
|
||||
res = client.post(
|
||||
url_for("import") + "?tag=import-test",
|
||||
data='https://website1.com\r\nhttps://website2.com',
|
||||
@@ -507,6 +508,97 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
|
||||
res = client.get(url_for('tags.tags_overview_page'))
|
||||
assert b'import-test' in res.data
|
||||
|
||||
# Test 2: Import with watch configuration fields (issue #3845)
|
||||
# Test string field (include_filters), boolean (paused), and processor
|
||||
import urllib.parse
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'config-test',
|
||||
'include_filters': 'div.content',
|
||||
'paused': 'true',
|
||||
'processor': 'text_json_diff',
|
||||
'title': 'Imported with Config'
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website3.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
assert len(res.json) == 1
|
||||
uuid = res.json[0]
|
||||
|
||||
# Verify the configuration was applied
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert watch['include_filters'] == ['div.content'], "include_filters should be set as array"
|
||||
assert watch['paused'] == True, "paused should be True"
|
||||
assert watch['processor'] == 'text_json_diff', "processor should be set"
|
||||
assert watch['title'] == 'Imported with Config', "title should be set"
|
||||
|
||||
# Test 3: Import with array field (notification_urls) - using valid Apprise format
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'notification-test',
|
||||
'notification_urls': 'mailto://test@example.com,mailto://admin@example.com'
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website4.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
uuid = res.json[0]
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert 'mailto://test@example.com' in watch['notification_urls'], "notification_urls should contain first email"
|
||||
assert 'mailto://admin@example.com' in watch['notification_urls'], "notification_urls should contain second email"
|
||||
|
||||
# Test 4: Import with object field (time_between_check)
|
||||
import json
|
||||
time_config = json.dumps({"hours": 2, "minutes": 30})
|
||||
params = urllib.parse.urlencode({
|
||||
'tag': 'schedule-test',
|
||||
'time_between_check': time_config
|
||||
})
|
||||
|
||||
res = client.post(
|
||||
url_for("import") + "?" + params,
|
||||
data='https://website5.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 200
|
||||
uuid = res.json[0]
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
assert watch['time_between_check']['hours'] == 2, "time_between_check hours should be 2"
|
||||
assert watch['time_between_check']['minutes'] == 30, "time_between_check minutes should be 30"
|
||||
|
||||
# Test 5: Import with invalid processor (should fail)
|
||||
res = client.post(
|
||||
url_for("import") + "?processor=invalid_processor",
|
||||
data='https://website6.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 400, "Should reject invalid processor"
|
||||
assert b"Invalid processor" in res.data, "Error message should mention invalid processor"
|
||||
|
||||
# Test 6: Import with invalid field (should fail)
|
||||
res = client.post(
|
||||
url_for("import") + "?unknown_field=value",
|
||||
data='https://website7.com',
|
||||
headers={'x-api-key': api_key},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert res.status_code == 400, "Should reject unknown field"
|
||||
assert b"Unknown watch configuration parameter" in res.data, "Error message should mention unknown parameter"
|
||||
|
||||
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ def test_watch_commit_persists_to_disk(client, live_server):
|
||||
watch.commit()
|
||||
|
||||
# Read directly from disk (bypass datastore cache)
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
assert os.path.exists(watch_json_path), "watch.json should exist on disk"
|
||||
|
||||
with open(watch_json_path, 'r') as f:
|
||||
@@ -89,7 +89,7 @@ def test_watch_commit_atomic_on_crash(client, live_server):
|
||||
watch.commit()
|
||||
|
||||
# Verify watch.json exists and is valid
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f) # Should not raise JSONDecodeError
|
||||
assert data['title'] == 'First Save'
|
||||
@@ -125,7 +125,7 @@ def test_multiple_watches_commit_independently(client, live_server):
|
||||
# Read all from disk
|
||||
def read_watch_json(uuid):
|
||||
watch = datastore.data['watching'][uuid]
|
||||
path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(path, 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
@@ -178,7 +178,7 @@ def test_concurrent_watch_commits_dont_corrupt(client, live_server):
|
||||
assert len(errors) == 0, f"Expected no errors, got: {errors}"
|
||||
|
||||
# JSON file should still be valid (not corrupted)
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f) # Should not raise JSONDecodeError
|
||||
assert data['uuid'] == uuid, "UUID should still be correct"
|
||||
@@ -270,7 +270,7 @@ def test_datastore_lock_protects_commit_snapshot(client, live_server):
|
||||
assert commits_succeeded[0] == 150, f"Expected 150 commits, got {commits_succeeded[0]}"
|
||||
|
||||
# Final JSON should be valid
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['uuid'] == uuid
|
||||
@@ -301,7 +301,7 @@ def test_processor_config_never_in_watch_json(client, live_server):
|
||||
watch.commit()
|
||||
|
||||
# Read watch.json from disk
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
@@ -343,7 +343,7 @@ def test_api_post_saves_processor_config_separately(client, live_server):
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Check that processor config file exists
|
||||
processor_config_path = os.path.join(watch.watch_data_dir, 'restock_diff.json')
|
||||
processor_config_path = os.path.join(watch.data_dir, 'restock_diff.json')
|
||||
assert os.path.exists(processor_config_path), "Processor config file should exist"
|
||||
|
||||
with open(processor_config_path, 'r') as f:
|
||||
@@ -385,7 +385,7 @@ def test_api_put_saves_processor_config_separately(client, live_server):
|
||||
watch = datastore.data['watching'][uuid]
|
||||
|
||||
# Check processor config file
|
||||
processor_config_path = os.path.join(watch.watch_data_dir, 'restock_diff.json')
|
||||
processor_config_path = os.path.join(watch.data_dir, 'restock_diff.json')
|
||||
assert os.path.exists(processor_config_path), "Processor config file should exist"
|
||||
|
||||
with open(processor_config_path, 'r') as f:
|
||||
@@ -414,7 +414,7 @@ def test_ui_edit_saves_processor_config_separately(client, live_server):
|
||||
watch.commit()
|
||||
|
||||
# Check watch.json has NO processor_config_* fields (main point of this test)
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
watch_data = json.load(f)
|
||||
|
||||
@@ -443,7 +443,7 @@ def test_browser_steps_normalized_to_empty_list(client, live_server):
|
||||
watch.commit()
|
||||
|
||||
# Read from disk
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
@@ -547,7 +547,7 @@ def test_tag_delete_removes_from_watches(client, live_server):
|
||||
# Tag should be removed from watches and persisted
|
||||
def check_watch_tags(uuid):
|
||||
watch = datastore.data['watching'][uuid]
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
return json.load(f)['tags']
|
||||
|
||||
@@ -572,7 +572,7 @@ def test_watch_pause_unpause_persists(client, live_server):
|
||||
assert response.status_code == 200
|
||||
|
||||
# Check persisted to disk
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['paused'] == True, "Pause should be persisted"
|
||||
@@ -601,7 +601,7 @@ def test_watch_mute_unmute_persists(client, live_server):
|
||||
assert response.status_code == 200
|
||||
|
||||
# Check persisted to disk
|
||||
watch_json_path = os.path.join(watch.watch_data_dir, 'watch.json')
|
||||
watch_json_path = os.path.join(watch.data_dir, 'watch.json')
|
||||
with open(watch_json_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
assert data['notification_muted'] == True, "Mute should be persisted"
|
||||
|
||||
@@ -474,3 +474,147 @@ the {test} appeared before. {test in res.data[:n]=}
|
||||
n += t_index + len(test)
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_tag_json_persistence(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that tags are saved to individual tag.json files and loaded correctly.
|
||||
|
||||
This test verifies the update_27 tag storage refactoring:
|
||||
- Tags are saved to {uuid}/tag.json files
|
||||
- Tags persist across datastore restarts
|
||||
- Tag edits write to tag.json
|
||||
- Tag deletion removes tag.json file
|
||||
"""
|
||||
import json
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# 1. Create a tag
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_add"),
|
||||
data={"name": "persistence-test-tag"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Tag added" in res.data
|
||||
|
||||
tag_uuid = get_UUID_for_tag_name(client, name="persistence-test-tag")
|
||||
assert tag_uuid, "Tag UUID should exist"
|
||||
|
||||
# 2. Verify tag.json file was created
|
||||
tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
|
||||
assert os.path.exists(tag_json_path), f"tag.json should exist at {tag_json_path}"
|
||||
|
||||
# 3. Verify tag.json contains correct data
|
||||
with open(tag_json_path, 'r') as f:
|
||||
tag_data = json.load(f)
|
||||
assert tag_data['title'] == 'persistence-test-tag'
|
||||
assert tag_data['uuid'] == tag_uuid
|
||||
assert 'date_created' in tag_data
|
||||
|
||||
# 4. Edit the tag
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_edit_submit", uuid=tag_uuid),
|
||||
data={
|
||||
"name": "persistence-test-tag",
|
||||
"notification_muted": True,
|
||||
"include_filters": '#test-filter'
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated" in res.data
|
||||
|
||||
# 5. Verify tag.json was updated
|
||||
with open(tag_json_path, 'r') as f:
|
||||
tag_data = json.load(f)
|
||||
assert tag_data['notification_muted'] == True
|
||||
assert '#test-filter' in tag_data.get('include_filters', [])
|
||||
|
||||
# 5a. Verify tag is NOT in changedetection.json (tags should be in tag.json only)
|
||||
changedetection_json_path = os.path.join(datastore_path, "changedetection.json")
|
||||
with open(changedetection_json_path, 'r') as f:
|
||||
settings_data = json.load(f)
|
||||
# Tags dict should be empty in settings (all tags are in individual files)
|
||||
assert settings_data['settings']['application']['tags'] == {}, \
|
||||
"Tags should NOT be saved to changedetection.json (should be empty dict)"
|
||||
|
||||
# 6. Simulate restart - reload datastore
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path, include_default_watches=False, version_tag='test')
|
||||
|
||||
# 7. Verify tag was loaded from tag.json
|
||||
assert tag_uuid in datastore2.data['settings']['application']['tags']
|
||||
loaded_tag = datastore2.data['settings']['application']['tags'][tag_uuid]
|
||||
assert loaded_tag['title'] == 'persistence-test-tag'
|
||||
assert loaded_tag['notification_muted'] == True
|
||||
assert '#test-filter' in loaded_tag.get('include_filters', [])
|
||||
|
||||
# 8. Delete the tag via API
|
||||
res = client.get(url_for("tags.delete", uuid=tag_uuid), follow_redirects=True)
|
||||
assert b"Tag deleted" in res.data
|
||||
|
||||
# 9. Verify tag.json file was deleted
|
||||
assert not os.path.exists(tag_json_path), f"tag.json should be deleted at {tag_json_path}"
|
||||
|
||||
# 10. Verify tag is removed from settings
|
||||
assert tag_uuid not in datastore.data['settings']['application']['tags']
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_tag_json_migration_update_27(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
Test that update_27 migration correctly moves tags to individual files.
|
||||
|
||||
This simulates a pre-update_27 datastore and verifies migration works.
|
||||
"""
|
||||
import json
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
datastore = client.application.config.get('DATASTORE')
|
||||
|
||||
# 1. Create multiple tags
|
||||
tag_names = ['migration-tag-1', 'migration-tag-2', 'migration-tag-3']
|
||||
tag_uuids = []
|
||||
|
||||
for tag_name in tag_names:
|
||||
res = client.post(
|
||||
url_for("tags.form_tag_add"),
|
||||
data={"name": tag_name},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Tag added" in res.data
|
||||
tag_uuid = get_UUID_for_tag_name(client, name=tag_name)
|
||||
tag_uuids.append(tag_uuid)
|
||||
|
||||
# 2. Verify all tag.json files exist (update_27 already ran during add_tag)
|
||||
for tag_uuid in tag_uuids:
|
||||
tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
|
||||
assert os.path.exists(tag_json_path), f"tag.json should exist for {tag_uuid}"
|
||||
|
||||
# 2a. Verify tags are NOT in changedetection.json
|
||||
changedetection_json_path = os.path.join(datastore_path, "changedetection.json")
|
||||
with open(changedetection_json_path, 'r') as f:
|
||||
settings_data = json.load(f)
|
||||
assert settings_data['settings']['application']['tags'] == {}, \
|
||||
"Tags should NOT be in changedetection.json after migration"
|
||||
|
||||
# 3. Simulate restart
|
||||
datastore2 = ChangeDetectionStore(datastore_path=datastore_path, include_default_watches=False, version_tag='test')
|
||||
|
||||
# 4. Verify all tags loaded from tag.json files
|
||||
for idx, tag_uuid in enumerate(tag_uuids):
|
||||
assert tag_uuid in datastore2.data['settings']['application']['tags']
|
||||
loaded_tag = datastore2.data['settings']['application']['tags'][tag_uuid]
|
||||
assert loaded_tag['title'] == tag_names[idx]
|
||||
|
||||
# Cleanup
|
||||
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
|
||||
assert b'All tags deleted' in res.data
|
||||
|
||||
# Verify all tag.json files were deleted
|
||||
for tag_uuid in tag_uuids:
|
||||
tag_json_path = os.path.join(datastore_path, tag_uuid, "tag.json")
|
||||
assert not os.path.exists(tag_json_path), f"tag.json should be deleted for {tag_uuid}"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -100,11 +100,11 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
# Test 1: Deepcopy shares datastore reference (doesn't copy it)
|
||||
watch_copy = deepcopy(watches[0])
|
||||
|
||||
self.assertIsNotNone(watch_copy._model__datastore,
|
||||
self.assertIsNotNone(watch_copy._datastore,
|
||||
"__datastore should exist in copied watch")
|
||||
self.assertIs(watch_copy._model__datastore, watches[0]._model__datastore,
|
||||
self.assertIs(watch_copy._datastore, watches[0]._datastore,
|
||||
"__datastore should be SHARED (same object), not copied")
|
||||
self.assertIs(watch_copy._model__datastore, mock_datastore,
|
||||
self.assertIs(watch_copy._datastore, mock_datastore,
|
||||
"__datastore should reference the original datastore")
|
||||
|
||||
# Test 2: Dict data is properly copied (not shared)
|
||||
@@ -130,7 +130,7 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
|
||||
# All copies should share the same datastore
|
||||
for copy in copies:
|
||||
self.assertIs(copy._model__datastore, mock_datastore,
|
||||
self.assertIs(copy._datastore, mock_datastore,
|
||||
"All copies should share the original datastore")
|
||||
|
||||
def test_watch_pickle_doesnt_serialize_datastore(self):
|
||||
@@ -160,7 +160,7 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
"Dict data should be preserved after pickle/unpickle")
|
||||
|
||||
# Test 2: __datastore is NOT serialized (attribute shouldn't exist after unpickle)
|
||||
self.assertFalse(hasattr(unpickled_watch, '_model__datastore'),
|
||||
self.assertFalse(hasattr(unpickled_watch, '_datastore'),
|
||||
"__datastore attribute should not exist after unpickle (not serialized)")
|
||||
|
||||
# Test 3: Pickled data shouldn't contain the large datastore object
|
||||
@@ -208,8 +208,8 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
"Modifying copy should not affect original")
|
||||
|
||||
# Test 5: Tag with datastore shares it (doesn't copy it)
|
||||
if hasattr(tag_with_ds, '_model__datastore'):
|
||||
self.assertIs(tag_copy2._model__datastore, tag_with_ds._model__datastore,
|
||||
if hasattr(tag_with_ds, '_datastore'):
|
||||
self.assertIs(tag_copy2._datastore, tag_with_ds._datastore,
|
||||
"Tag should share __datastore reference like Watch does")
|
||||
|
||||
def test_watch_copy_performance(self):
|
||||
|
||||
Reference in New Issue
Block a user