Compare commits

...

4 Commits

Author SHA1 Message Date
dgtlmoon
16bf6bafd1 Oops 2026-02-17 02:24:32 +01:00
dgtlmoon
449b2c2dc3 Merge branch 'master' into fixing-signals-update 2026-02-17 02:19:29 +01:00
dgtlmoon
a53f2a784d Pluggy plugin hook for before and after a watch is processed (#3888) 2026-02-17 01:58:41 +01:00
dgtlmoon
dd2c8dfd2b Fixing socket updates for status updates. 2026-02-17 01:57:24 +01:00
3 changed files with 165 additions and 17 deletions

View File

@@ -194,9 +194,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
tag_limit = request.args.get('tag')
now = int(time.time())
# Mark watches as viewed in background thread to avoid blocking
def mark_viewed_background():
"""Background thread to mark watches as viewed - discarded after completion."""
# Mark watches as viewed - use background thread only for large watch counts
def mark_viewed_impl():
"""Mark watches as viewed - can run synchronously or in background thread."""
marked_count = 0
try:
for watch_uuid, watch in datastore.data['watching'].items():
@@ -209,15 +209,21 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
datastore.set_last_viewed(watch_uuid, now)
marked_count += 1
logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
logger.info(f"Marking complete: {marked_count} watches marked as viewed")
except Exception as e:
logger.error(f"Error in background mark as viewed: {e}")
logger.error(f"Error marking as viewed: {e}")
# Start background thread and return immediately
thread = threading.Thread(target=mark_viewed_background, daemon=True)
thread.start()
# For small watch counts (< 10), run synchronously to avoid race conditions in tests
# For larger counts, use background thread to avoid blocking the UI
watch_count = len(datastore.data['watching'])
if watch_count < 10:
# Run synchronously for small watch counts
mark_viewed_impl()
else:
# Start background thread for large watch counts
thread = threading.Thread(target=mark_viewed_impl, daemon=True)
thread.start()
flash(gettext("Marking watches as viewed in background..."))
return redirect(url_for('watchlist.index', tag=tag_limit))
@ui_blueprint.route("/delete", methods=['GET'])

View File

@@ -129,6 +129,51 @@ class ChangeDetectionSpec:
"""
pass
@hookspec
def update_handler_alter(update_handler, watch, datastore):
"""Modify or wrap the update_handler before it processes a watch.
This hook is called after the update_handler (perform_site_check instance) is created
but before it calls call_browser() and run_changedetection(). Plugins can use this to:
- Wrap the handler to add logging/metrics
- Modify handler configuration
- Add custom preprocessing logic
Args:
update_handler: The perform_site_check instance that will process the watch
watch: The watch dict being processed
datastore: The application datastore
Returns:
object or None: Return a modified/wrapped handler, or None to keep the original.
If multiple plugins return handlers, they are chained in registration order.
"""
pass
@hookspec
def update_finalize(update_handler, watch, datastore, processing_exception):
"""Called after watch processing completes (success or failure).
This hook is called in the finally block after all processing is complete,
allowing plugins to perform cleanup, update metrics, or log final status.
The plugin can access update_handler.last_logging_insert_id if it was stored
during update_handler_alter, and use processing_exception to determine if
the processing succeeded or failed.
Args:
update_handler: The perform_site_check instance (may be None if creation failed)
watch: The watch dict that was processed (may be None if not loaded)
datastore: The application datastore
processing_exception: The exception from the main processing block, or None if successful.
This does NOT include cleanup exceptions - only exceptions from
the actual watch processing (fetch, diff, etc).
Returns:
None: This hook doesn't return a value
"""
pass
# Set up Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -499,4 +544,66 @@ def get_plugin_template_paths():
template_paths.append(templates_dir)
logger.debug(f"Added plugin template path: {templates_dir}")
return template_paths
return template_paths
def apply_update_handler_alter(update_handler, watch, datastore):
"""Apply update_handler_alter hooks from all plugins.
Allows plugins to wrap or modify the update_handler before it processes a watch.
Multiple plugins can chain modifications - each plugin receives the result from
the previous plugin.
Args:
update_handler: The perform_site_check instance to potentially modify
watch: The watch dict being processed
datastore: The application datastore
Returns:
object: The (potentially modified/wrapped) update_handler
"""
# Get all plugins that implement the update_handler_alter hook
results = plugin_manager.hook.update_handler_alter(
update_handler=update_handler,
watch=watch,
datastore=datastore
)
# Chain results - each plugin gets the result from the previous one
current_handler = update_handler
if results:
for result in results:
if result is not None:
logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}")
current_handler = result
return current_handler
def apply_update_finalize(update_handler, watch, datastore, processing_exception):
"""Apply update_finalize hooks from all plugins.
Called in the finally block after watch processing completes, allowing plugins
to perform cleanup, update metrics, or log final status.
Args:
update_handler: The perform_site_check instance (may be None)
watch: The watch dict that was processed (may be None)
datastore: The application datastore
processing_exception: The exception from processing, or None if successful
Returns:
None
"""
try:
# Call all plugins that implement the update_finalize hook
plugin_manager.hook.update_finalize(
update_handler=update_handler,
watch=watch,
datastore=datastore,
processing_exception=processing_exception
)
except Exception as e:
# Don't let plugin errors crash the worker
logger.error(f"Error in update_finalize hook: {e}")
logger.exception(f"update_finalize hook exception details:")

View File

@@ -4,11 +4,10 @@ import changedetectionio.content_fetchers.exceptions as content_fetchers_excepti
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
from changedetectionio import html_tools
from changedetectionio import worker_pool
from changedetectionio.flask_app import watch_check_update
from changedetectionio.queuedWatchMetaData import PrioritizedItem
from changedetectionio.pluggy_interface import apply_update_handler_alter, apply_update_finalize
import asyncio
import importlib
import os
import sys
import time
@@ -56,6 +55,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
while not app.config.exit.is_set():
update_handler = None
watch = None
processing_exception = None # Reset at start of each iteration to prevent state bleeding
try:
# Efficient blocking via run_in_executor (no polling overhead!)
@@ -119,7 +119,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
# to prevent race condition with wait_for_all_checks()
fetch_start_time = round(time.time())
try:
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
changed_detected = False
@@ -136,6 +136,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
try:
# Retrieve signal by name to ensure thread-safe access across worker threads
watch_check_update = signal('watch_check_update')
watch_check_update.send(watch_uuid=uuid)
# Processor is what we are using for detecting the "Change"
@@ -154,6 +156,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid)
# Allow plugins to modify/wrap the update_handler
update_handler = apply_update_handler_alter(update_handler, watch, datastore)
update_signal = signal('watch_small_status_comment')
update_signal.send(watch_uuid=uuid, status="Fetching page..")
@@ -498,6 +503,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
gc.collect()
except Exception as e:
# Store the processing exception for plugin finalization hook
processing_exception = e
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
logger.exception(f"Worker {worker_id} full exception details:")
@@ -509,6 +516,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
finally:
# Always cleanup - this runs whether there was an exception or not
if uuid:
# Capture references for plugin finalize hook BEFORE cleanup
# (cleanup may delete these variables, but plugins need the original references)
finalize_handler = update_handler # Capture now, before cleanup deletes it
finalize_watch = watch # Capture now, before any modifications
# Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
try:
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
@@ -518,11 +530,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.exception(f"Worker {worker_id} full exception details:")
try:
# Release UUID from processing (thread-safe)
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
# Send completion signal
# Send completion signal - retrieve by name to ensure thread-safe access
if watch:
watch_check_update = signal('watch_check_update')
watch_check_update.send(watch_uuid=watch['uuid'])
# Clean up all memory references BEFORE garbage collection
@@ -547,6 +557,31 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
logger.exception(f"Worker {worker_id} full exception details:")
# Call plugin finalization hook after all cleanup is done
# Use captured references from before cleanup
try:
apply_update_finalize(
update_handler=finalize_handler,
watch=finalize_watch,
datastore=datastore,
processing_exception=processing_exception
)
except Exception as finalize_error:
logger.error(f"Worker {worker_id} error in finalize hook: {finalize_error}")
logger.exception(f"Worker {worker_id} full exception details:")
finally:
# Clean up captured references to allow immediate garbage collection
del finalize_handler
del finalize_watch
# Release UUID from processing AFTER all cleanup and hooks complete (thread-safe)
# This ensures wait_for_all_checks() waits for finalize hooks to complete
try:
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
except Exception as release_error:
logger.error(f"Worker {worker_id} error releasing UUID: {release_error}")
logger.exception(f"Worker {worker_id} full exception details:")
del(uuid)
# Brief pause before continuing to avoid tight error loops (only on error)