mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-03-26 05:37:59 +00:00
Compare commits
3 Commits
python-314
...
watch-memo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
799818dd40 | ||
|
|
b06797636c | ||
|
|
fcd07e23f3 |
@@ -66,47 +66,42 @@ class Watch(Resource):
|
|||||||
@validate_openapi_request('getWatch')
|
@validate_openapi_request('getWatch')
|
||||||
def get(self, uuid):
|
def get(self, uuid):
|
||||||
"""Get information about a single watch, recheck, pause, or mute."""
|
"""Get information about a single watch, recheck, pause, or mute."""
|
||||||
import time
|
# Get watch reference first (for pause/mute operations)
|
||||||
from copy import deepcopy
|
watch_obj = self.datastore.data['watching'].get(uuid)
|
||||||
watch = None
|
if not watch_obj:
|
||||||
# Retry up to 20 times if dict is being modified
|
|
||||||
# With sleep(0), this is fast: ~200µs best case, ~20ms worst case under heavy load
|
|
||||||
for attempt in range(20):
|
|
||||||
try:
|
|
||||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
|
||||||
break
|
|
||||||
except RuntimeError:
|
|
||||||
# Dict changed during deepcopy, retry after yielding to scheduler
|
|
||||||
# sleep(0) releases GIL and yields - no fixed delay, just lets other threads run
|
|
||||||
if attempt < 19: # Don't yield on last attempt
|
|
||||||
time.sleep(0) # Yield to scheduler (microseconds, not milliseconds)
|
|
||||||
|
|
||||||
if not watch:
|
|
||||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||||
|
|
||||||
|
# Create a dict copy for JSON response (with lock for thread safety)
|
||||||
|
# This is much faster than deepcopy and doesn't copy the datastore reference
|
||||||
|
# WARNING: dict() is a SHALLOW copy - nested dicts are shared with original!
|
||||||
|
# Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts
|
||||||
|
# If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj))
|
||||||
|
with self.datastore.lock:
|
||||||
|
watch = dict(watch_obj)
|
||||||
|
|
||||||
if request.args.get('recheck'):
|
if request.args.get('recheck'):
|
||||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
if request.args.get('paused', '') == 'paused':
|
if request.args.get('paused', '') == 'paused':
|
||||||
self.datastore.data['watching'].get(uuid).pause()
|
watch_obj.pause()
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
elif request.args.get('paused', '') == 'unpaused':
|
elif request.args.get('paused', '') == 'unpaused':
|
||||||
self.datastore.data['watching'].get(uuid).unpause()
|
watch_obj.unpause()
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
if request.args.get('muted', '') == 'muted':
|
if request.args.get('muted', '') == 'muted':
|
||||||
self.datastore.data['watching'].get(uuid).mute()
|
watch_obj.mute()
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
elif request.args.get('muted', '') == 'unmuted':
|
elif request.args.get('muted', '') == 'unmuted':
|
||||||
self.datastore.data['watching'].get(uuid).unmute()
|
watch_obj.unmute()
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
|
|
||||||
# Return without history, get that via another API call
|
# Return without history, get that via another API call
|
||||||
# Properties are not returned as a JSON, so add the required props manually
|
# Properties are not returned as a JSON, so add the required props manually
|
||||||
watch['history_n'] = watch.history_n
|
watch['history_n'] = watch_obj.history_n
|
||||||
# attr .last_changed will check for the last written text snapshot on change
|
# attr .last_changed will check for the last written text snapshot on change
|
||||||
watch['last_changed'] = watch.last_changed
|
watch['last_changed'] = watch_obj.last_changed
|
||||||
watch['viewed'] = watch.viewed
|
watch['viewed'] = watch_obj.viewed
|
||||||
watch['link'] = watch.link,
|
watch['link'] = watch_obj.link,
|
||||||
|
|
||||||
return watch
|
return watch
|
||||||
|
|
||||||
|
|||||||
@@ -131,6 +131,95 @@ class model(watch_base):
|
|||||||
# Be sure the cached timestamp is ready
|
# Be sure the cached timestamp is ready
|
||||||
bump = self.history
|
bump = self.history
|
||||||
|
|
||||||
|
def __deepcopy__(self, memo):
|
||||||
|
"""
|
||||||
|
Custom deepcopy that excludes __datastore to prevent memory leaks.
|
||||||
|
|
||||||
|
CRITICAL FIX: Without this, deepcopy(watch) copies the entire datastore
|
||||||
|
(which contains all other watches), causing exponential memory growth.
|
||||||
|
With 100 watches, this creates 10,000 watch objects in memory (100²).
|
||||||
|
|
||||||
|
This is called by:
|
||||||
|
- api/Watch.py:76 (API endpoint)
|
||||||
|
- processors/base.py:26 (EVERY processor run)
|
||||||
|
- store/__init__.py:544 (clone watch)
|
||||||
|
- And 4+ other locations
|
||||||
|
"""
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
# Create a new instance without calling __init__ (avoids __datastore requirement)
|
||||||
|
cls = self.__class__
|
||||||
|
new_watch = cls.__new__(cls)
|
||||||
|
memo[id(self)] = new_watch
|
||||||
|
|
||||||
|
# Copy the dict data (all the watch settings)
|
||||||
|
for key, value in self.items():
|
||||||
|
new_watch[key] = deepcopy(value, memo)
|
||||||
|
|
||||||
|
# Copy instance attributes EXCEPT the datastore references
|
||||||
|
# These are cached/computed values that need to be preserved
|
||||||
|
new_watch._model__newest_history_key = self._model__newest_history_key
|
||||||
|
new_watch._model__history_n = self._model__history_n
|
||||||
|
new_watch.jitter_seconds = self.jitter_seconds
|
||||||
|
|
||||||
|
# Copy datastore_path (string, safe to copy)
|
||||||
|
new_watch._model__datastore_path = self._model__datastore_path
|
||||||
|
|
||||||
|
# CRITICAL: Share the datastore reference (don't copy it!)
|
||||||
|
# This is safe because we never modify the datastore through the watch
|
||||||
|
new_watch._model__datastore = self._model__datastore
|
||||||
|
|
||||||
|
# Do NOT copy favicon cache - let it be regenerated on demand
|
||||||
|
# This is just a performance cache (prevents repeated glob operations)
|
||||||
|
# and will be rebuilt automatically on first access
|
||||||
|
|
||||||
|
return new_watch
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
"""
|
||||||
|
Custom pickle serialization that excludes __datastore.
|
||||||
|
|
||||||
|
This handles pickle/unpickle (used by multiprocessing, caching, etc.)
|
||||||
|
and ensures the datastore reference is never serialized.
|
||||||
|
"""
|
||||||
|
# Get the dict data
|
||||||
|
state = dict(self)
|
||||||
|
|
||||||
|
# Add the instance attributes we want to preserve
|
||||||
|
state['__watch_metadata__'] = {
|
||||||
|
'newest_history_key': self._model__newest_history_key,
|
||||||
|
'history_n': self._model__history_n,
|
||||||
|
'jitter_seconds': self.jitter_seconds,
|
||||||
|
'datastore_path': self._model__datastore_path,
|
||||||
|
}
|
||||||
|
|
||||||
|
# NOTE: __datastore and _favicon_filename_cache are intentionally excluded
|
||||||
|
# Both will be regenerated/restored as needed
|
||||||
|
return state
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
"""
|
||||||
|
Custom pickle deserialization.
|
||||||
|
|
||||||
|
WARNING: This creates a Watch without a __datastore reference!
|
||||||
|
The caller MUST set watch._model__datastore after unpickling.
|
||||||
|
"""
|
||||||
|
# Extract metadata
|
||||||
|
metadata = state.pop('__watch_metadata__', {})
|
||||||
|
|
||||||
|
# Restore dict data
|
||||||
|
self.update(state)
|
||||||
|
|
||||||
|
# Restore instance attributes
|
||||||
|
self._model__newest_history_key = metadata.get('newest_history_key')
|
||||||
|
self._model__history_n = metadata.get('history_n', 0)
|
||||||
|
self.jitter_seconds = metadata.get('jitter_seconds', 0)
|
||||||
|
self._model__datastore_path = metadata.get('datastore_path')
|
||||||
|
|
||||||
|
# __datastore is NOT restored - caller must set it!
|
||||||
|
# _favicon_filename_cache is NOT restored - will regenerate on demand
|
||||||
|
self._model__datastore = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def viewed(self):
|
def viewed(self):
|
||||||
# Don't return viewed when last_viewed is 0 and newest_key is 0
|
# Don't return viewed when last_viewed is 0 and newest_key is 0
|
||||||
|
|||||||
@@ -23,7 +23,14 @@ class difference_detection_processor():
|
|||||||
def __init__(self, datastore, watch_uuid):
|
def __init__(self, datastore, watch_uuid):
|
||||||
self.datastore = datastore
|
self.datastore = datastore
|
||||||
self.watch_uuid = watch_uuid
|
self.watch_uuid = watch_uuid
|
||||||
|
|
||||||
|
# Create a stable snapshot of the watch for processing
|
||||||
|
# Why deepcopy?
|
||||||
|
# 1. Prevents "dict changed during iteration" errors if watch is modified during processing
|
||||||
|
# 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict()
|
||||||
|
# 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data
|
||||||
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid))
|
||||||
|
|
||||||
# Generic fetcher that should be extended (requests, playwright etc)
|
# Generic fetcher that should be extended (requests, playwright etc)
|
||||||
self.fetcher = Fetcher()
|
self.fetcher = Fetcher()
|
||||||
|
|
||||||
|
|||||||
@@ -248,7 +248,7 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
# Fresh install - create new datastore
|
# Fresh install - create new datastore
|
||||||
logger.critical(f"No datastore found, creating new datastore at {self.datastore_path}")
|
logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")
|
||||||
|
|
||||||
# Set schema version to latest (no updates needed)
|
# Set schema version to latest (no updates needed)
|
||||||
updates_available = self.get_updates_available()
|
updates_available = self.get_updates_available()
|
||||||
@@ -541,7 +541,11 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
|||||||
# Clone a watch by UUID
|
# Clone a watch by UUID
|
||||||
def clone(self, uuid):
|
def clone(self, uuid):
|
||||||
url = self.data['watching'][uuid].get('url')
|
url = self.data['watching'][uuid].get('url')
|
||||||
extras = deepcopy(self.data['watching'][uuid])
|
# No need to deepcopy here - add_watch() will deepcopy extras anyway (line 569)
|
||||||
|
# Just pass a dict copy (with lock for thread safety)
|
||||||
|
# NOTE: dict() is shallow copy but safe since add_watch() deepcopies it
|
||||||
|
with self.lock:
|
||||||
|
extras = dict(self.data['watching'][uuid])
|
||||||
new_uuid = self.add_watch(url=url, extras=extras)
|
new_uuid = self.add_watch(url=url, extras=extras)
|
||||||
watch = self.data['watching'][new_uuid]
|
watch = self.data['watching'][new_uuid]
|
||||||
return new_uuid
|
return new_uuid
|
||||||
|
|||||||
Reference in New Issue
Block a user