Compare commits

...

6 Commits

Author SHA1 Message Date
dgtlmoon
16bf6bafd1 Oops 2026-02-17 02:24:32 +01:00
dgtlmoon
449b2c2dc3 Merge branch 'master' into fixing-signals-update 2026-02-17 02:19:29 +01:00
dgtlmoon
a53f2a784d Pluggy plugin hook for before and after a watch is processed (#3888) 2026-02-17 01:58:41 +01:00
dgtlmoon
dd2c8dfd2b Fixing socket updates for status updates. 2026-02-17 01:57:24 +01:00
dgtlmoon
7558ca5fda 0.53.3 2026-02-16 20:41:07 +01:00
dgtlmoon
383c3b427f API - Adding automated test for API with NGINX sub-path, Skip validation errors about server path (allows use on sub-paths/reverse proxy etc) (#3886) 2026-02-16 20:32:35 +01:00
7 changed files with 384 additions and 20 deletions

33
.github/nginx-reverse-proxy-test.conf vendored Normal file
View File

@@ -0,0 +1,33 @@
server {
listen 80;
server_name localhost;
# Test basic reverse proxy to changedetection.io
location / {
proxy_pass http://changedet-app:5000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# WebSocket support
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
# Test subpath deployment with X-Forwarded-Prefix
location /changedet-sub/ {
proxy_pass http://changedet-app:5000/;
proxy_set_header X-Forwarded-Prefix /changedet-sub;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# WebSocket support
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
}

View File

@@ -324,6 +324,175 @@ jobs:
run: |
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
nginx-reverse-proxy:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v6
- name: Download Docker image artifact
uses: actions/download-artifact@v7
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up services
run: |
docker network create changedet-network
# Start changedetection.io container with X-Forwarded headers support
docker run --name changedet-app --hostname changedet-app --network changedet-network \
-e USE_X_SETTINGS=true \
-d test-changedetectionio
sleep 3
- name: Start nginx reverse proxy
run: |
# Start nginx with our test configuration
docker run --name nginx-proxy --network changedet-network -d -p 8080:80 --rm \
-v ${{ github.workspace }}/.github/nginx-reverse-proxy-test.conf:/etc/nginx/conf.d/default.conf:ro \
nginx:alpine
sleep 2
- name: Test reverse proxy - root path
run: |
echo "=== Testing nginx reverse proxy at root path ==="
curl --retry-connrefused --retry 6 -s http://localhost:8080/ > /tmp/nginx-test-root.html
# Check for changedetection.io UI elements
if grep -q "checkbox-uuid" /tmp/nginx-test-root.html; then
echo "✓ Found checkbox-uuid in response"
else
echo "ERROR: checkbox-uuid not found in response"
cat /tmp/nginx-test-root.html
exit 1
fi
# Check for watchlist content
if grep -q -i "watch" /tmp/nginx-test-root.html; then
echo "✓ Found watch/watchlist content in response"
else
echo "ERROR: watchlist content not found"
cat /tmp/nginx-test-root.html
exit 1
fi
echo "✓ Root path reverse proxy working correctly"
- name: Test reverse proxy - subpath with X-Forwarded-Prefix
run: |
echo "=== Testing nginx reverse proxy at subpath /changedet-sub/ ==="
curl --retry-connrefused --retry 6 -s http://localhost:8080/changedet-sub/ > /tmp/nginx-test-subpath.html
# Check for changedetection.io UI elements
if grep -q "checkbox-uuid" /tmp/nginx-test-subpath.html; then
echo "✓ Found checkbox-uuid in subpath response"
else
echo "ERROR: checkbox-uuid not found in subpath response"
cat /tmp/nginx-test-subpath.html
exit 1
fi
echo "✓ Subpath reverse proxy working correctly"
- name: Test API through reverse proxy subpath
run: |
echo "=== Testing API endpoints through nginx subpath /changedet-sub/ ==="
# Extract API key from the changedetection.io datastore
API_KEY=$(docker exec changedet-app cat /datastore/changedetection.json | grep -o '"api_access_token": *"[^"]*"' | cut -d'"' -f4)
if [ -z "$API_KEY" ]; then
echo "ERROR: Could not extract API key from datastore"
docker exec changedet-app cat /datastore/changedetection.json
exit 1
fi
echo "✓ Extracted API key: ${API_KEY:0:8}..."
# Create a watch via API through nginx proxy subpath
echo "Creating watch via POST to /changedet-sub/api/v1/watch"
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/changedet-sub/api/v1/watch" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com/test-nginx-proxy",
"tag": "nginx-test"
}')
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" != "201" ]; then
echo "ERROR: Expected HTTP 201, got $HTTP_CODE"
echo "Response: $BODY"
exit 1
fi
echo "✓ Watch created successfully (HTTP 201)"
# Extract the watch UUID from response
WATCH_UUID=$(echo "$BODY" | grep -o '"uuid": *"[^"]*"' | cut -d'"' -f4)
echo "✓ Watch UUID: $WATCH_UUID"
# Update the watch via PUT through nginx proxy subpath
echo "Updating watch via PUT to /changedet-sub/api/v1/watch/${WATCH_UUID}"
RESPONSE=$(curl -s -w "\n%{http_code}" -X PUT "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
-H "x-api-key: ${API_KEY}" \
-H "Content-Type: application/json" \
-d '{
"paused": true
}')
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
BODY=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" != "200" ]; then
echo "ERROR: Expected HTTP 200, got $HTTP_CODE"
echo "Response: $BODY"
exit 1
fi
if echo "$BODY" | grep -q 'OK'; then
echo "✓ Watch updated successfully (HTTP 200, response: OK)"
else
echo "ERROR: Expected response 'OK', got: $BODY"
echo "Response: $BODY"
exit 1
fi
# Verify the watch is paused via GET
echo "Verifying watch is paused via GET"
RESPONSE=$(curl -s "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \
-H "x-api-key: ${API_KEY}")
if echo "$RESPONSE" | grep -q '"paused": *true'; then
echo "✓ Watch is paused as expected"
else
echo "ERROR: Watch paused state not confirmed"
echo "Response: $RESPONSE"
exit 1
fi
echo "✓ API tests through nginx subpath completed successfully"
- name: Cleanup nginx test
if: always()
run: |
docker logs nginx-proxy || true
docker logs changedet-app || true
docker stop nginx-proxy changedet-app || true
docker rm nginx-proxy changedet-app || true
# Proxy tests
proxy-tests:
runs-on: ubuntu-latest

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1.
__version__ = '0.53.2'
__version__ = '0.53.3'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -103,6 +103,7 @@ def validate_openapi_request(operation_id):
if request.method.upper() != 'GET':
# Lazy import - only loaded when actually validating a request
from openapi_core.contrib.flask import FlaskOpenAPIRequest
from openapi_core.templating.paths.exceptions import ServerNotFound, PathNotFound, PathError
spec = get_openapi_spec()
openapi_request = FlaskOpenAPIRequest(request)
@@ -110,6 +111,16 @@ def validate_openapi_request(operation_id):
if result.errors:
error_details = []
for error in result.errors:
# Skip path/server validation errors for reverse proxy compatibility
# Flask routing already validates that endpoints exist (returns 404 if not).
# OpenAPI validation here is primarily for request body schema validation.
# When behind nginx/reverse proxy, URLs may have path prefixes that don't
# match the OpenAPI server definitions, causing false positives.
if isinstance(error, PathError):
logger.debug(f"API Call - Skipping path/server validation (delegated to Flask): {error}")
continue
error_str = str(error)
# Extract detailed schema errors from __cause__
if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'):
for schema_error in error.__cause__.schema_errors:
@@ -117,9 +128,12 @@ def validate_openapi_request(operation_id):
msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error)
error_details.append(f"{field}: {msg}")
else:
error_details.append(str(error))
error_details.append(error_str)
# Only raise if we have actual validation errors (not path/server issues)
if error_details:
logger.error(f"API Call - Validation failed: {'; '.join(error_details)}")
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
raise BadRequest(f"Validation failed: {'; '.join(error_details)}")
except BadRequest:
# Re-raise BadRequest exceptions (validation failures)
raise

View File

@@ -194,9 +194,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
tag_limit = request.args.get('tag')
now = int(time.time())
# Mark watches as viewed in background thread to avoid blocking
def mark_viewed_background():
"""Background thread to mark watches as viewed - discarded after completion."""
# Mark watches as viewed - use background thread only for large watch counts
def mark_viewed_impl():
"""Mark watches as viewed - can run synchronously or in background thread."""
marked_count = 0
try:
for watch_uuid, watch in datastore.data['watching'].items():
@@ -209,15 +209,21 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
datastore.set_last_viewed(watch_uuid, now)
marked_count += 1
logger.info(f"Background marking complete: {marked_count} watches marked as viewed")
logger.info(f"Marking complete: {marked_count} watches marked as viewed")
except Exception as e:
logger.error(f"Error in background mark as viewed: {e}")
logger.error(f"Error marking as viewed: {e}")
# Start background thread and return immediately
thread = threading.Thread(target=mark_viewed_background, daemon=True)
thread.start()
# For small watch counts (< 10), run synchronously to avoid race conditions in tests
# For larger counts, use background thread to avoid blocking the UI
watch_count = len(datastore.data['watching'])
if watch_count < 10:
# Run synchronously for small watch counts
mark_viewed_impl()
else:
# Start background thread for large watch counts
thread = threading.Thread(target=mark_viewed_impl, daemon=True)
thread.start()
flash(gettext("Marking watches as viewed in background..."))
return redirect(url_for('watchlist.index', tag=tag_limit))
@ui_blueprint.route("/delete", methods=['GET'])

View File

@@ -129,6 +129,51 @@ class ChangeDetectionSpec:
"""
pass
@hookspec
def update_handler_alter(update_handler, watch, datastore):
"""Modify or wrap the update_handler before it processes a watch.
This hook is called after the update_handler (perform_site_check instance) is created
but before it calls call_browser() and run_changedetection(). Plugins can use this to:
- Wrap the handler to add logging/metrics
- Modify handler configuration
- Add custom preprocessing logic
Args:
update_handler: The perform_site_check instance that will process the watch
watch: The watch dict being processed
datastore: The application datastore
Returns:
object or None: Return a modified/wrapped handler, or None to keep the original.
If multiple plugins return handlers, they are chained in registration order.
"""
pass
@hookspec
def update_finalize(update_handler, watch, datastore, processing_exception):
"""Called after watch processing completes (success or failure).
This hook is called in the finally block after all processing is complete,
allowing plugins to perform cleanup, update metrics, or log final status.
The plugin can access update_handler.last_logging_insert_id if it was stored
during update_handler_alter, and use processing_exception to determine if
the processing succeeded or failed.
Args:
update_handler: The perform_site_check instance (may be None if creation failed)
watch: The watch dict that was processed (may be None if not loaded)
datastore: The application datastore
processing_exception: The exception from the main processing block, or None if successful.
This does NOT include cleanup exceptions - only exceptions from
the actual watch processing (fetch, diff, etc).
Returns:
None: This hook doesn't return a value
"""
pass
# Set up Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
@@ -499,4 +544,66 @@ def get_plugin_template_paths():
template_paths.append(templates_dir)
logger.debug(f"Added plugin template path: {templates_dir}")
return template_paths
return template_paths
def apply_update_handler_alter(update_handler, watch, datastore):
"""Apply update_handler_alter hooks from all plugins.
Allows plugins to wrap or modify the update_handler before it processes a watch.
Multiple plugins can chain modifications - each plugin receives the result from
the previous plugin.
Args:
update_handler: The perform_site_check instance to potentially modify
watch: The watch dict being processed
datastore: The application datastore
Returns:
object: The (potentially modified/wrapped) update_handler
"""
# Get all plugins that implement the update_handler_alter hook
results = plugin_manager.hook.update_handler_alter(
update_handler=update_handler,
watch=watch,
datastore=datastore
)
# Chain results - each plugin gets the result from the previous one
current_handler = update_handler
if results:
for result in results:
if result is not None:
logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}")
current_handler = result
return current_handler
def apply_update_finalize(update_handler, watch, datastore, processing_exception):
"""Apply update_finalize hooks from all plugins.
Called in the finally block after watch processing completes, allowing plugins
to perform cleanup, update metrics, or log final status.
Args:
update_handler: The perform_site_check instance (may be None)
watch: The watch dict that was processed (may be None)
datastore: The application datastore
processing_exception: The exception from processing, or None if successful
Returns:
None
"""
try:
# Call all plugins that implement the update_finalize hook
plugin_manager.hook.update_finalize(
update_handler=update_handler,
watch=watch,
datastore=datastore,
processing_exception=processing_exception
)
except Exception as e:
# Don't let plugin errors crash the worker
logger.error(f"Error in update_finalize hook: {e}")
logger.exception(f"update_finalize hook exception details:")

View File

@@ -4,11 +4,10 @@ import changedetectionio.content_fetchers.exceptions as content_fetchers_excepti
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
from changedetectionio import html_tools
from changedetectionio import worker_pool
from changedetectionio.flask_app import watch_check_update
from changedetectionio.queuedWatchMetaData import PrioritizedItem
from changedetectionio.pluggy_interface import apply_update_handler_alter, apply_update_finalize
import asyncio
import importlib
import os
import sys
import time
@@ -56,6 +55,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
while not app.config.exit.is_set():
update_handler = None
watch = None
processing_exception = None # Reset at start of each iteration to prevent state bleeding
try:
# Efficient blocking via run_in_executor (no polling overhead!)
@@ -119,7 +119,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
# to prevent race condition with wait_for_all_checks()
fetch_start_time = round(time.time())
try:
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
changed_detected = False
@@ -136,6 +136,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
try:
# Retrieve signal by name to ensure thread-safe access across worker threads
watch_check_update = signal('watch_check_update')
watch_check_update.send(watch_uuid=uuid)
# Processor is what we are using for detecting the "Change"
@@ -154,6 +156,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid)
# Allow plugins to modify/wrap the update_handler
update_handler = apply_update_handler_alter(update_handler, watch, datastore)
update_signal = signal('watch_small_status_comment')
update_signal.send(watch_uuid=uuid, status="Fetching page..")
@@ -498,6 +503,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
gc.collect()
except Exception as e:
# Store the processing exception for plugin finalization hook
processing_exception = e
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
logger.exception(f"Worker {worker_id} full exception details:")
@@ -509,6 +516,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
finally:
# Always cleanup - this runs whether there was an exception or not
if uuid:
# Capture references for plugin finalize hook BEFORE cleanup
# (cleanup may delete these variables, but plugins need the original references)
finalize_handler = update_handler # Capture now, before cleanup deletes it
finalize_watch = watch # Capture now, before any modifications
# Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
try:
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
@@ -518,11 +530,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.exception(f"Worker {worker_id} full exception details:")
try:
# Release UUID from processing (thread-safe)
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
# Send completion signal
# Send completion signal - retrieve by name to ensure thread-safe access
if watch:
watch_check_update = signal('watch_check_update')
watch_check_update.send(watch_uuid=watch['uuid'])
# Clean up all memory references BEFORE garbage collection
@@ -547,6 +557,31 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
logger.exception(f"Worker {worker_id} full exception details:")
# Call plugin finalization hook after all cleanup is done
# Use captured references from before cleanup
try:
apply_update_finalize(
update_handler=finalize_handler,
watch=finalize_watch,
datastore=datastore,
processing_exception=processing_exception
)
except Exception as finalize_error:
logger.error(f"Worker {worker_id} error in finalize hook: {finalize_error}")
logger.exception(f"Worker {worker_id} full exception details:")
finally:
# Clean up captured references to allow immediate garbage collection
del finalize_handler
del finalize_watch
# Release UUID from processing AFTER all cleanup and hooks complete (thread-safe)
# This ensures wait_for_all_checks() waits for finalize hooks to complete
try:
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
except Exception as release_error:
logger.error(f"Worker {worker_id} error releasing UUID: {release_error}")
logger.exception(f"Worker {worker_id} full exception details:")
del(uuid)
# Brief pause before continuing to avoid tight error loops (only on error)