mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-02-06 14:26:07 +00:00
Compare commits
3 Commits
watch-data
...
watch-memo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
799818dd40 | ||
|
|
b06797636c | ||
|
|
fcd07e23f3 |
@@ -66,47 +66,42 @@ class Watch(Resource):
|
||||
@validate_openapi_request('getWatch')
|
||||
def get(self, uuid):
|
||||
"""Get information about a single watch, recheck, pause, or mute."""
|
||||
import time
|
||||
from copy import deepcopy
|
||||
watch = None
|
||||
# Retry up to 20 times if dict is being modified
|
||||
# With sleep(0), this is fast: ~200µs best case, ~20ms worst case under heavy load
|
||||
for attempt in range(20):
|
||||
try:
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
break
|
||||
except RuntimeError:
|
||||
# Dict changed during deepcopy, retry after yielding to scheduler
|
||||
# sleep(0) releases GIL and yields - no fixed delay, just lets other threads run
|
||||
if attempt < 19: # Don't yield on last attempt
|
||||
time.sleep(0) # Yield to scheduler (microseconds, not milliseconds)
|
||||
|
||||
if not watch:
|
||||
# Get watch reference first (for pause/mute operations)
|
||||
watch_obj = self.datastore.data['watching'].get(uuid)
|
||||
if not watch_obj:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
# Create a dict copy for JSON response (with lock for thread safety)
|
||||
# This is much faster than deepcopy and doesn't copy the datastore reference
|
||||
# WARNING: dict() is a SHALLOW copy - nested dicts are shared with original!
|
||||
# Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts
|
||||
# If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj))
|
||||
with self.datastore.lock:
|
||||
watch = dict(watch_obj)
|
||||
|
||||
if request.args.get('recheck'):
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return "OK", 200
|
||||
if request.args.get('paused', '') == 'paused':
|
||||
self.datastore.data['watching'].get(uuid).pause()
|
||||
watch_obj.pause()
|
||||
return "OK", 200
|
||||
elif request.args.get('paused', '') == 'unpaused':
|
||||
self.datastore.data['watching'].get(uuid).unpause()
|
||||
watch_obj.unpause()
|
||||
return "OK", 200
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
self.datastore.data['watching'].get(uuid).mute()
|
||||
watch_obj.mute()
|
||||
return "OK", 200
|
||||
elif request.args.get('muted', '') == 'unmuted':
|
||||
self.datastore.data['watching'].get(uuid).unmute()
|
||||
watch_obj.unmute()
|
||||
return "OK", 200
|
||||
|
||||
# Return without history, get that via another API call
|
||||
# Properties are not returned as a JSON, so add the required props manually
|
||||
watch['history_n'] = watch.history_n
|
||||
watch['history_n'] = watch_obj.history_n
|
||||
# attr .last_changed will check for the last written text snapshot on change
|
||||
watch['last_changed'] = watch.last_changed
|
||||
watch['viewed'] = watch.viewed
|
||||
watch['link'] = watch.link,
|
||||
watch['last_changed'] = watch_obj.last_changed
|
||||
watch['viewed'] = watch_obj.viewed
|
||||
watch['link'] = watch_obj.link,
|
||||
|
||||
return watch
|
||||
|
||||
|
||||
@@ -131,6 +131,95 @@ class model(watch_base):
|
||||
# Be sure the cached timestamp is ready
|
||||
bump = self.history
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
"""
|
||||
Custom deepcopy that excludes __datastore to prevent memory leaks.
|
||||
|
||||
CRITICAL FIX: Without this, deepcopy(watch) copies the entire datastore
|
||||
(which contains all other watches), causing exponential memory growth.
|
||||
With 100 watches, this creates 10,000 watch objects in memory (100²).
|
||||
|
||||
This is called by:
|
||||
- api/Watch.py:76 (API endpoint)
|
||||
- processors/base.py:26 (EVERY processor run)
|
||||
- store/__init__.py:544 (clone watch)
|
||||
- And 4+ other locations
|
||||
"""
|
||||
from copy import deepcopy
|
||||
|
||||
# Create a new instance without calling __init__ (avoids __datastore requirement)
|
||||
cls = self.__class__
|
||||
new_watch = cls.__new__(cls)
|
||||
memo[id(self)] = new_watch
|
||||
|
||||
# Copy the dict data (all the watch settings)
|
||||
for key, value in self.items():
|
||||
new_watch[key] = deepcopy(value, memo)
|
||||
|
||||
# Copy instance attributes EXCEPT the datastore references
|
||||
# These are cached/computed values that need to be preserved
|
||||
new_watch._model__newest_history_key = self._model__newest_history_key
|
||||
new_watch._model__history_n = self._model__history_n
|
||||
new_watch.jitter_seconds = self.jitter_seconds
|
||||
|
||||
# Copy datastore_path (string, safe to copy)
|
||||
new_watch._model__datastore_path = self._model__datastore_path
|
||||
|
||||
# CRITICAL: Share the datastore reference (don't copy it!)
|
||||
# This is safe because we never modify the datastore through the watch
|
||||
new_watch._model__datastore = self._model__datastore
|
||||
|
||||
# Do NOT copy favicon cache - let it be regenerated on demand
|
||||
# This is just a performance cache (prevents repeated glob operations)
|
||||
# and will be rebuilt automatically on first access
|
||||
|
||||
return new_watch
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Custom pickle serialization that excludes __datastore.
|
||||
|
||||
This handles pickle/unpickle (used by multiprocessing, caching, etc.)
|
||||
and ensures the datastore reference is never serialized.
|
||||
"""
|
||||
# Get the dict data
|
||||
state = dict(self)
|
||||
|
||||
# Add the instance attributes we want to preserve
|
||||
state['__watch_metadata__'] = {
|
||||
'newest_history_key': self._model__newest_history_key,
|
||||
'history_n': self._model__history_n,
|
||||
'jitter_seconds': self.jitter_seconds,
|
||||
'datastore_path': self._model__datastore_path,
|
||||
}
|
||||
|
||||
# NOTE: __datastore and _favicon_filename_cache are intentionally excluded
|
||||
# Both will be regenerated/restored as needed
|
||||
return state
|
||||
|
||||
def __setstate__(self, state):
|
||||
"""
|
||||
Custom pickle deserialization.
|
||||
|
||||
WARNING: This creates a Watch without a __datastore reference!
|
||||
The caller MUST set watch._model__datastore after unpickling.
|
||||
"""
|
||||
# Extract metadata
|
||||
metadata = state.pop('__watch_metadata__', {})
|
||||
|
||||
# Restore dict data
|
||||
self.update(state)
|
||||
|
||||
# Restore instance attributes
|
||||
self._model__newest_history_key = metadata.get('newest_history_key')
|
||||
self._model__history_n = metadata.get('history_n', 0)
|
||||
self.jitter_seconds = metadata.get('jitter_seconds', 0)
|
||||
self._model__datastore_path = metadata.get('datastore_path')
|
||||
|
||||
# __datastore is NOT restored - caller must set it!
|
||||
# _favicon_filename_cache is NOT restored - will regenerate on demand
|
||||
self._model__datastore = None
|
||||
|
||||
@property
|
||||
def viewed(self):
|
||||
# Don't return viewed when last_viewed is 0 and newest_key is 0
|
||||
|
||||
@@ -23,7 +23,14 @@ class difference_detection_processor():
|
||||
def __init__(self, datastore, watch_uuid):
|
||||
self.datastore = datastore
|
||||
self.watch_uuid = watch_uuid
|
||||
|
||||
# Create a stable snapshot of the watch for processing
|
||||
# Why deepcopy?
|
||||
# 1. Prevents "dict changed during iteration" errors if watch is modified during processing
|
||||
# 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
|
||||
# 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
|
||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
|
||||
@@ -248,7 +248,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
else:
|
||||
# Fresh install - create new datastore
|
||||
logger.critical(f"No datastore found, creating new datastore at {self.datastore_path}")
|
||||
logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
|
||||
|
||||
# Set schema version to latest (no updates needed)
|
||||
updates_available = self.get_updates_available()
|
||||
@@ -541,7 +541,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
# Clone a watch by UUID
|
||||
def clone(self, uuid):
|
||||
url = self.data['watching'][uuid].get('url')
|
||||
extras = deepcopy(self.data['watching'][uuid])
|
||||
# No need to deepcopy here - add_watch() will deepcopy extras anyway (line 569)
|
||||
# Just pass a dict copy (with lock for thread safety)
|
||||
# NOTE: dict() is shallow copy but safe since add_watch() deepcopies it
|
||||
with self.lock:
|
||||
extras = dict(self.data['watching'][uuid])
|
||||
new_uuid = self.add_watch(url=url, extras=extras)
|
||||
watch = self.data['watching'][new_uuid]
|
||||
return new_uuid
|
||||
|
||||
Reference in New Issue
Block a user