Compare commits

...

3 Commits

Author SHA1 Message Date
dgtlmoon
799818dd40 add note 2026-02-05 16:30:35 +01:00
dgtlmoon
b06797636c New datastore message should be warning not critical 2026-02-05 16:24:52 +01:00
dgtlmoon
fcd07e23f3 Improved watch global settings handling 2026-02-05 16:21:52 +01:00
4 changed files with 121 additions and 26 deletions

View File

@@ -66,47 +66,42 @@ class Watch(Resource):
@validate_openapi_request('getWatch')
def get(self, uuid):
"""Get information about a single watch, recheck, pause, or mute."""
import time
from copy import deepcopy
watch = None
# Retry up to 20 times if dict is being modified
# With sleep(0), this is fast: ~200µs best case, ~20ms worst case under heavy load
for attempt in range(20):
try:
watch = deepcopy(self.datastore.data['watching'].get(uuid))
break
except RuntimeError:
# Dict changed during deepcopy, retry after yielding to scheduler
# sleep(0) releases GIL and yields - no fixed delay, just lets other threads run
if attempt < 19: # Don't yield on last attempt
time.sleep(0) # Yield to scheduler (microseconds, not milliseconds)
if not watch:
# Get watch reference first (for pause/mute operations)
watch_obj = self.datastore.data['watching'].get(uuid)
if not watch_obj:
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
# Create a dict copy for JSON response (with lock for thread safety)
# This is much faster than deepcopy and doesn't copy the datastore reference
# WARNING: dict() is a SHALLOW copy - nested dicts are shared with original!
# Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts
# If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj))
with self.datastore.lock:
watch = dict(watch_obj)
if request.args.get('recheck'):
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
return "OK", 200
if request.args.get('paused', '') == 'paused':
self.datastore.data['watching'].get(uuid).pause()
watch_obj.pause()
return "OK", 200
elif request.args.get('paused', '') == 'unpaused':
self.datastore.data['watching'].get(uuid).unpause()
watch_obj.unpause()
return "OK", 200
if request.args.get('muted', '') == 'muted':
self.datastore.data['watching'].get(uuid).mute()
watch_obj.mute()
return "OK", 200
elif request.args.get('muted', '') == 'unmuted':
self.datastore.data['watching'].get(uuid).unmute()
watch_obj.unmute()
return "OK", 200
# Return without history, get that via another API call
# Properties are not returned as a JSON, so add the required props manually
watch['history_n'] = watch.history_n
watch['history_n'] = watch_obj.history_n
# attr .last_changed will check for the last written text snapshot on change
watch['last_changed'] = watch.last_changed
watch['viewed'] = watch.viewed
watch['link'] = watch.link,
watch['last_changed'] = watch_obj.last_changed
watch['viewed'] = watch_obj.viewed
watch['link'] = watch_obj.link,
return watch

View File

@@ -131,6 +131,95 @@ class model(watch_base):
# Be sure the cached timestamp is ready
bump = self.history
def __deepcopy__(self, memo):
"""
Custom deepcopy that excludes __datastore to prevent memory leaks.
CRITICAL FIX: Without this, deepcopy(watch) copies the entire datastore
(which contains all other watches), causing exponential memory growth.
With 100 watches, this creates 10,000 watch objects in memory (100²).
This is called by:
- api/Watch.py:76 (API endpoint)
- processors/base.py:26 (EVERY processor run)
- store/__init__.py:544 (clone watch)
- And 4+ other locations
"""
from copy import deepcopy
# Create a new instance without calling __init__ (avoids __datastore requirement)
cls = self.__class__
new_watch = cls.__new__(cls)
memo[id(self)] = new_watch
# Copy the dict data (all the watch settings)
for key, value in self.items():
new_watch[key] = deepcopy(value, memo)
# Copy instance attributes EXCEPT the datastore references
# These are cached/computed values that need to be preserved
new_watch._model__newest_history_key = self._model__newest_history_key
new_watch._model__history_n = self._model__history_n
new_watch.jitter_seconds = self.jitter_seconds
# Copy datastore_path (string, safe to copy)
new_watch._model__datastore_path = self._model__datastore_path
# CRITICAL: Share the datastore reference (don't copy it!)
# This is safe because we never modify the datastore through the watch
new_watch._model__datastore = self._model__datastore
# Do NOT copy favicon cache - let it be regenerated on demand
# This is just a performance cache (prevents repeated glob operations)
# and will be rebuilt automatically on first access
return new_watch
def __getstate__(self):
"""
Custom pickle serialization that excludes __datastore.
This handles pickle/unpickle (used by multiprocessing, caching, etc.)
and ensures the datastore reference is never serialized.
"""
# Get the dict data
state = dict(self)
# Add the instance attributes we want to preserve
state['__watch_metadata__'] = {
'newest_history_key': self._model__newest_history_key,
'history_n': self._model__history_n,
'jitter_seconds': self.jitter_seconds,
'datastore_path': self._model__datastore_path,
}
# NOTE: __datastore and _favicon_filename_cache are intentionally excluded
# Both will be regenerated/restored as needed
return state
def __setstate__(self, state):
"""
Custom pickle deserialization.
WARNING: This creates a Watch without a __datastore reference!
The caller MUST set watch._model__datastore after unpickling.
"""
# Extract metadata
metadata = state.pop('__watch_metadata__', {})
# Restore dict data
self.update(state)
# Restore instance attributes
self._model__newest_history_key = metadata.get('newest_history_key')
self._model__history_n = metadata.get('history_n', 0)
self.jitter_seconds = metadata.get('jitter_seconds', 0)
self._model__datastore_path = metadata.get('datastore_path')
# __datastore is NOT restored - caller must set it!
# _favicon_filename_cache is NOT restored - will regenerate on demand
self._model__datastore = None
@property
def viewed(self):
# Don't return viewed when last_viewed is 0 and newest_key is 0

View File

@@ -23,7 +23,14 @@ class difference_detection_processor():
def __init__(self, datastore, watch_uuid):
self.datastore = datastore
self.watch_uuid = watch_uuid
# Create a stable snapshot of the watch for processing
# Why deepcopy?
# 1. Prevents "dict changed during iteration" errors if watch is modified during processing
# 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
# 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
# Generic fetcher that should be extended (requests, playwright etc)
self.fetcher = Fetcher()

View File

@@ -248,7 +248,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
else:
# Fresh install - create new datastore
logger.critical(f"No datastore found, creating new datastore at {self.datastore_path}")
logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
# Set schema version to latest (no updates needed)
updates_available = self.get_updates_available()
@@ -541,7 +541,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
# Clone a watch by UUID
def clone(self, uuid):
url = self.data['watching'][uuid].get('url')
extras = deepcopy(self.data['watching'][uuid])
# No need to deepcopy here - add_watch() will deepcopy extras anyway (line 569)
# Just pass a dict copy (with lock for thread safety)
# NOTE: dict() is shallow copy but safe since add_watch() deepcopies it
with self.lock:
extras = dict(self.data['watching'][uuid])
new_uuid = self.add_watch(url=url, extras=extras)
watch = self.data['watching'][new_uuid]
return new_uuid