mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-02-06 14:26:07 +00:00
Compare commits
10 Commits
event-loop
...
jinja2-esc
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
913f0bf103 | ||
|
|
f77d2bac6d | ||
|
|
75ecd1b793 | ||
|
|
4fe2a67839 | ||
|
|
5bbbe37436 | ||
|
|
83d7ce0fcf | ||
|
|
6bea9909ec | ||
|
|
1aabf967ef | ||
|
|
30dc4ac23b | ||
|
|
2658f81f02 |
@@ -37,10 +37,29 @@ jobs:
|
||||
${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Get current date for cache key
|
||||
id: date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./
|
||||
file: ./Dockerfile
|
||||
build-args: |
|
||||
PYTHON_VERSION=${{ env.PYTHON_VERSION }}
|
||||
LOGGER_LEVEL=TRACE
|
||||
tags: test-changedetectionio
|
||||
load: true
|
||||
cache-from: type=gha,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }}
|
||||
cache-to: type=gha,mode=max,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }}
|
||||
|
||||
- name: Verify build
|
||||
run: |
|
||||
echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
|
||||
echo "---- Built for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
|
||||
- name: We should be Python ${{ env.PYTHON_VERSION }} ...
|
||||
@@ -376,6 +395,29 @@ jobs:
|
||||
cd changedetectionio
|
||||
./run_custom_browser_url_tests.sh
|
||||
|
||||
processor-plugin-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 20
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Basic processor plugin registration and checks
|
||||
run: |
|
||||
docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor'
|
||||
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -29,3 +29,4 @@ test-datastore/
|
||||
|
||||
# Memory consumption log
|
||||
test-memory.log
|
||||
tests/logs/
|
||||
|
||||
@@ -138,6 +138,15 @@ ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy and set up entrypoint script for installing extra packages
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh
|
||||
|
||||
# Set entrypoint to handle EXTRA_PACKAGES env var
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
|
||||
# Default command (can be overridden in docker-compose.yml)
|
||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||
|
||||
|
||||
|
||||
@@ -102,8 +102,8 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
|
||||
# Shutdown workers and queues immediately
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
from changedetectionio import worker_pool
|
||||
worker_pool.shutdown_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down workers: {str(e)}")
|
||||
|
||||
@@ -415,12 +415,12 @@ def main():
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if batch_mode and added_watch_uuids:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_handler
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
logger.info(f"Batch mode: Queuing {len(added_watch_uuids)} newly added watches")
|
||||
for watch_uuid in added_watch_uuids:
|
||||
try:
|
||||
worker_handler.queue_item_async_safe(
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
@@ -432,7 +432,7 @@ def main():
|
||||
# This must happen AFTER app initialization so update_q is available
|
||||
if recheck_watches is not None:
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData, worker_handler
|
||||
from changedetectionio import queuedWatchMetaData, worker_pool
|
||||
|
||||
watches_to_queue = []
|
||||
if recheck_watches == 'all':
|
||||
@@ -454,7 +454,7 @@ def main():
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_handler.queue_item_async_safe(
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
@@ -516,7 +516,7 @@ def main():
|
||||
for watch_uuid in watches_to_queue:
|
||||
if watch_uuid in datastore.data['watching']:
|
||||
try:
|
||||
worker_handler.queue_item_async_safe(
|
||||
worker_pool.queue_item_async_safe(
|
||||
update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
)
|
||||
@@ -549,7 +549,7 @@ def main():
|
||||
logger.info(f"Batch mode: Waiting for iteration {current_iteration}/{total_iterations} to complete...")
|
||||
|
||||
# Use the shared wait_for_all_checks function
|
||||
completed = worker_handler.wait_for_all_checks(update_q, timeout=300)
|
||||
completed = worker_pool.wait_for_all_checks(update_q, timeout=300)
|
||||
|
||||
if not completed:
|
||||
logger.warning(f"Batch mode: Iteration {current_iteration} timed out after 300 seconds")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
from loguru import logger
|
||||
@@ -42,7 +42,7 @@ class Tag(Resource):
|
||||
# If less than 20 watches, queue synchronously for immediate feedback
|
||||
if len(watches_to_queue) < 20:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
return {'status': f'OK, queued {len(watches_to_queue)} watches for rechecking'}, 200
|
||||
else:
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
@@ -50,7 +50,7 @@ class Tag(Resource):
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
try:
|
||||
for watch_uuid in watches_to_queue:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
logger.info(f"Background queueing complete for tag {tag['uuid']}: {len(watches_to_queue)} watches queued")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background queueing for tag {tag['uuid']}: {e}")
|
||||
|
||||
@@ -6,7 +6,7 @@ from changedetectionio.favicon_utils import get_favicon_mime_type
|
||||
|
||||
from . import auth
|
||||
from changedetectionio import queuedWatchMetaData, strtobool
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from flask import request, make_response, send_from_directory
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
@@ -85,7 +85,7 @@ class Watch(Resource):
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
if request.args.get('recheck'):
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return "OK", 200
|
||||
if request.args.get('paused', '') == 'paused':
|
||||
self.datastore.data['watching'].get(uuid).pause()
|
||||
@@ -169,58 +169,18 @@ class Watch(Resource):
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
from changedetectionio import processors
|
||||
processor_config_data = {}
|
||||
regular_data = {}
|
||||
|
||||
for key, value in request.json.items():
|
||||
if key.startswith('processor_config_'):
|
||||
config_key = key.replace('processor_config_', '')
|
||||
if value: # Only save non-empty values
|
||||
processor_config_data[config_key] = value
|
||||
else:
|
||||
regular_data[key] = value
|
||||
# Make a mutable copy of request.json for modification
|
||||
json_data = dict(request.json)
|
||||
|
||||
# Extract and remove processor config fields from json_data
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(json_data)
|
||||
|
||||
# Update watch with regular (non-processor-config) fields
|
||||
watch.update(regular_data)
|
||||
watch.update(json_data)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = request.json.get('processor', watch.get('processor'))
|
||||
if processor_name:
|
||||
# Create a processor instance to access config methods
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
processor_instance = difference_detection_processor(self.datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"API: Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"API: Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"API: Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API: Failed to save processor config: {e}")
|
||||
# Save processor config to JSON file
|
||||
processors.save_processor_config(self.datastore, uuid, processor_config_data)
|
||||
|
||||
return "OK", 200
|
||||
|
||||
@@ -477,9 +437,19 @@ class CreateWatch(Resource):
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||
if new_uuid:
|
||||
# Dont queue because the scheduler will check that it hasnt been checked before anyway
|
||||
# worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
# worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
# Check if it was a limit issue
|
||||
page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
|
||||
if page_watch_limit:
|
||||
try:
|
||||
page_watch_limit = int(page_watch_limit)
|
||||
current_watch_count = len(self.datastore.data['watching'])
|
||||
if current_watch_count >= page_watch_limit:
|
||||
return f"Watch limit reached ({current_watch_count}/{page_watch_limit} watches). Cannot add more watches.", 429
|
||||
except ValueError:
|
||||
pass
|
||||
return "Invalid or unsupported URL", 400
|
||||
|
||||
@auth.check_token
|
||||
@@ -514,7 +484,7 @@ class CreateWatch(Resource):
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = [
|
||||
@@ -524,7 +494,7 @@ class CreateWatch(Resource):
|
||||
|
||||
# Queue only the filtered watches
|
||||
for uuid in watches_to_queue_filtered:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
@@ -536,7 +506,7 @@ class CreateWatch(Resource):
|
||||
# 20+ watches - queue in background thread to avoid blocking API response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(self.update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_all_watches_background():
|
||||
"""Background thread to queue all watches - discarded after completion."""
|
||||
@@ -546,7 +516,7 @@ class CreateWatch(Resource):
|
||||
for uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if uuid not in queued_uuids and uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h4>{{ _('Backups') }}</h4>
|
||||
<h2>{{ _('Backups') }}</h2>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<strong>{{ _('A backup is running!') }}</strong>
|
||||
<span class="spinner"></span> <strong>{{ _('A backup is running!') }}</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
|
||||
@@ -14,7 +14,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
from changedetectionio import forms
|
||||
#
|
||||
if request.method == 'POST':
|
||||
# from changedetectionio import worker_handler
|
||||
# from changedetectionio import worker_pool
|
||||
|
||||
from changedetectionio.blueprint.imports.importer import (
|
||||
import_url_list,
|
||||
@@ -26,12 +26,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# URL List import
|
||||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
from changedetectionio import processors
|
||||
importer_handler = import_url_list()
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', processors.get_default_processor()))
|
||||
logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs")
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
if len(importer_handler.remaining_data) == 0:
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -45,7 +46,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# XLSX importer
|
||||
@@ -70,7 +71,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
# worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
# worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
|
||||
@@ -4,7 +4,7 @@ from flask import Blueprint, flash, redirect, url_for
|
||||
from flask_login import login_required
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from queue import PriorityQueue
|
||||
|
||||
PRICE_DATA_TRACK_ACCEPT = 'accepted'
|
||||
@@ -20,7 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
return redirect(url_for("watchlist.index"))
|
||||
|
||||
@login_required
|
||||
|
||||
@@ -37,6 +37,8 @@ def construct_single_watch_routes(rss_blueprint, datastore):
|
||||
|
||||
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
# Get the watch by UUID
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
|
||||
@@ -83,7 +83,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
# Adjust worker count if it changed
|
||||
if new_worker_count != old_worker_count:
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
|
||||
|
||||
# Check CPU core availability and warn if worker count is high
|
||||
@@ -92,7 +92,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
flash(gettext("Warning: Worker count ({}) is close to or exceeds available CPU cores ({})").format(
|
||||
new_worker_count, cpu_count), 'warning')
|
||||
|
||||
result = worker_handler.adjust_async_worker_count(
|
||||
result = worker_pool.adjust_async_worker_count(
|
||||
new_count=new_worker_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
<li class="tab"><a href="#ui-options">{{ _('UI Options') }}</a></li>
|
||||
<li class="tab"><a href="#api">{{ _('API') }}</a></li>
|
||||
<li class="tab"><a href="#rss">{{ _('RSS') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.index') }}" class="pure-menu-link">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="{{ url_for('backups.index') }}">{{ _('Backups') }}</a></li>
|
||||
<li class="tab"><a href="#timedate">{{ _('Time & Date') }}</a></li>
|
||||
<li class="tab"><a href="#proxies">{{ _('CAPTCHA & Proxies') }}</a></li>
|
||||
{% if plugin_tabs %}
|
||||
|
||||
@@ -10,7 +10,7 @@ from changedetectionio.blueprint.ui.notification import construct_blueprint as c
|
||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||
from changedetectionio.blueprint.ui import diff, preview
|
||||
|
||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
||||
from flask import request, flash
|
||||
|
||||
if op == 'delete':
|
||||
@@ -63,7 +63,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Recheck and require a full reprocessing
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
if emit_flash:
|
||||
flash(gettext("{} watches queued for rechecking").format(len(uuids)))
|
||||
|
||||
@@ -114,7 +114,7 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
for uuid in uuids:
|
||||
watch_check_update.send(watch_uuid=uuid)
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update):
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool, queuedWatchMetaData, watch_check_update):
|
||||
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
||||
|
||||
# Register the edit blueprint
|
||||
@@ -222,14 +222,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
@login_optionally_required
|
||||
def form_delete():
|
||||
uuid = request.args.get('uuid')
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if uuid != 'all' and not uuid in datastore.data['watching'].keys():
|
||||
flash(gettext('The watch by UUID {} does not exist.').format(uuid), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
datastore.delete(uuid)
|
||||
flash(gettext('Deleted.'))
|
||||
|
||||
@@ -239,14 +239,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
@login_optionally_required
|
||||
def form_clone():
|
||||
uuid = request.args.get('uuid')
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
new_uuid = datastore.clone(uuid)
|
||||
|
||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
|
||||
flash(gettext('Cloned, you are editing the new watch.'))
|
||||
|
||||
@@ -262,10 +262,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
if uuid:
|
||||
# Single watch - check if already queued or running
|
||||
if worker_handler.is_watch_running(uuid) or uuid in update_q.get_queued_uuids():
|
||||
if worker_pool.is_watch_running(uuid) or uuid in update_q.get_queued_uuids():
|
||||
flash(gettext("Watch is already queued or being checked."))
|
||||
else:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
flash(gettext("Queued 1 watch for rechecking."))
|
||||
else:
|
||||
# Multiple watches - first count how many need to be queued
|
||||
@@ -284,7 +284,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
if len(watches_to_queue) < 20:
|
||||
# Get already queued/running UUIDs once (efficient)
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
# Filter out watches that are already queued or running
|
||||
watches_to_queue_filtered = []
|
||||
@@ -294,7 +294,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
|
||||
# Queue only the filtered watches
|
||||
for watch_uuid in watches_to_queue_filtered:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
|
||||
# Provide feedback about skipped watches
|
||||
skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered)
|
||||
@@ -310,7 +310,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
# 20+ watches - queue in background thread to avoid blocking HTTP response
|
||||
# Capture queued/running state before background thread
|
||||
queued_uuids = set(update_q.get_queued_uuids())
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
running_uuids = set(worker_pool.get_running_uuids())
|
||||
|
||||
def queue_watches_background():
|
||||
"""Background thread to queue watches - discarded after completion."""
|
||||
@@ -320,7 +320,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
for watch_uuid in watches_to_queue:
|
||||
# Check if already queued or running (state captured at start)
|
||||
if watch_uuid not in queued_uuids and watch_uuid not in running_uuids:
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
queued_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
@@ -349,7 +349,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
extra_data=extra_data,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
uuids=uuids,
|
||||
worker_handler=worker_handler,
|
||||
worker_pool=worker_pool,
|
||||
update_q=update_q,
|
||||
watch_check_update=watch_check_update,
|
||||
op=op,
|
||||
@@ -367,9 +367,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
import json
|
||||
from copy import deepcopy
|
||||
|
||||
# more for testing
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
# copy it to memory as trim off what we dont need (history)
|
||||
watch = deepcopy(datastore.data['watching'].get(uuid))
|
||||
|
||||
@@ -83,7 +83,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
If a processor doesn't have a difference module, falls back to text_json_diff.
|
||||
"""
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -101,23 +100,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have difference module, use text_json_diff as default
|
||||
from changedetectionio.processors.text_json_diff.difference import render as default_render
|
||||
@@ -144,10 +141,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/extract.py::render_form()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -157,23 +154,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
|
||||
# Call the processor's render_form() function
|
||||
if hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
# Call the processor's render_form() function
|
||||
if processor_module and hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import render_form as default_render_form
|
||||
@@ -200,7 +195,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/extract.py::process_extraction()
|
||||
If a processor doesn't have an extract module, falls back to text_json_diff.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -213,24 +208,22 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
|
||||
# Call the processor's process_extraction() function
|
||||
if hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
# Call the processor's process_extraction() function
|
||||
if processor_module and hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import process_extraction as default_process_extraction
|
||||
@@ -267,7 +260,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
- /diff/{uuid}/processor-asset/after
|
||||
- /diff/{uuid}/processor-asset/rendered_diff
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
@@ -280,38 +273,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
return diff_blueprint
|
||||
|
||||
@@ -9,7 +9,7 @@ from jinja2 import Environment, FileSystemLoader
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio.time_handler import is_within_schedule
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||
@@ -30,14 +30,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
from changedetectionio import processors
|
||||
import importlib
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
# More for testing, possible to return the first/only
|
||||
if not datastore.data['watching'].keys():
|
||||
flash(gettext("No watches to edit"), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
if not uuid in datastore.data['watching']:
|
||||
flash(gettext("No watch with the UUID {} found.").format(uuid), "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
@@ -72,8 +71,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
||||
if not processor_classes:
|
||||
flash(gettext("Cannot load the edit form for processor/plugin '{}', plugin missing?").format(processor_classes[1]), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing. Please select a different processor.").format(processor_name), 'error')
|
||||
# Fall back to default processor so user can still edit and change processor
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == 'text_json_diff'), None)
|
||||
if not processor_classes:
|
||||
# If even text_json_diff is missing, something is very wrong
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing.").format(processor_name), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
parent_module = processors.get_parent_module(processor_classes[0])
|
||||
|
||||
@@ -150,58 +154,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
processor_config_data = {}
|
||||
fields_to_remove = []
|
||||
for field_name, field_value in form.data.items():
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
if field_value: # Only save non-empty values
|
||||
processor_config_data[config_key] = field_value
|
||||
fields_to_remove.append(field_name)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = form.data.get('processor')
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = processors.difference_detection_processor(datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
# Try to import the edit_hook module from the processor package
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
|
||||
watch_obj = datastore.data['watching'][uuid]
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
|
||||
# Remove processor-config-* fields from form.data before updating datastore
|
||||
for field_name in fields_to_remove:
|
||||
form.data.pop(field_name, None)
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
# IMPORTANT: These must NOT be saved to url-watches.json, only to the processor-specific JSON file
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(form.data)
|
||||
processors.save_processor_config(datastore, uuid, processor_config_data)
|
||||
|
||||
# Ignore text
|
||||
form_ignore_text = form.ignore_text.data
|
||||
@@ -283,7 +239,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
#############################
|
||||
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -311,10 +267,17 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Get fetcher capabilities instead of hardcoded logic
|
||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||
|
||||
# Add processor capabilities from module
|
||||
capabilities['supports_visual_selector'] = getattr(parent_module, 'supports_visual_selector', False)
|
||||
capabilities['supports_text_filters_and_triggers'] = getattr(parent_module, 'supports_text_filters_and_triggers', False)
|
||||
capabilities['supports_text_filters_and_triggers_elements'] = getattr(parent_module, 'supports_text_filters_and_triggers_elements', False)
|
||||
capabilities['supports_request_type'] = getattr(parent_module, 'supports_request_type', False)
|
||||
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
||||
|
||||
c = [f"processor-{watch.get('processor')}"]
|
||||
if worker_handler.is_watch_running(uuid):
|
||||
if worker_pool.is_watch_running(uuid):
|
||||
c.append('checking-now')
|
||||
|
||||
template_args = {
|
||||
@@ -371,6 +334,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
from flask import send_file
|
||||
import brotli
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||
latest_filename = list(watch.history.keys())[-1]
|
||||
@@ -395,6 +360,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
def watch_get_preview_rendered(uuid):
|
||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||
from flask import jsonify
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
from changedetectionio.processors.text_json_diff import prepare_filter_prevew
|
||||
result = prepare_filter_prevew(watch_uuid=uuid, form_data=request.form, datastore=datastore)
|
||||
return jsonify(result)
|
||||
|
||||
@@ -26,10 +26,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
Each processor implements processors/{type}/preview.py::render()
|
||||
If a processor doesn't have a preview module, falls back to default text preview.
|
||||
"""
|
||||
# More for testing, possible to return the first/only
|
||||
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -39,24 +38,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
|
||||
# Fallback: if processor doesn't have preview module, use default text preview
|
||||
content = []
|
||||
@@ -150,10 +146,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
"""
|
||||
from flask import make_response
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
@@ -163,39 +157,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
return preview_blueprint
|
||||
|
||||
@@ -45,14 +45,19 @@
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="#general">{{ _('General') }}</a></li>
|
||||
{% if capabilities.supports_request_type %}
|
||||
<li class="tab"><a href="#request">{{ _('Request') }}</a></li>
|
||||
{% endif %}
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">{{ _('Browser Steps') }}</a></li>
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
{% endif %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
||||
{% endif %}
|
||||
@@ -116,6 +121,7 @@
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if capabilities.supports_request_type %}
|
||||
<div class="tab-pane-inner" id="request">
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
@@ -203,6 +209,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="tab-pane-inner" id="browser-steps">
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
@@ -283,8 +290,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
<div class="tab-pane-inner" id="conditions">
|
||||
<script>
|
||||
const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}";
|
||||
@@ -303,7 +309,9 @@ Math: {{ 1 + 1 }}") }}
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
||||
<div>
|
||||
<div id="edit-text-filter">
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers_elements %}
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
<strong>{{ _('Pro-tips:') }}</strong><br>
|
||||
<ul>
|
||||
<li>
|
||||
@@ -314,8 +322,8 @@ Math: {{ 1 + 1 }}") }}
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{% include "edit/include_subtract.html" %}
|
||||
{% endif %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
@@ -374,7 +382,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ extra_form_content|safe }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
|
||||
@@ -386,7 +394,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ _('The Visual Selector tool lets you select the') }} <i>{{ _('text') }}</i> {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} <a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a> {{ _('tab. Use') }} <strong>{{ _('Shift+Click') }}</strong> {{ _('to select multiple items.') }}
|
||||
</span>
|
||||
|
||||
{% if watch['processor'] == 'image_ssim_diff' %}
|
||||
{% if watch['processor'] == 'image_ssim_diff' %} {# @todo, integrate with image_ssim_diff selector better, use some extra form ? #}
|
||||
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
|
||||
<label style="font-weight: 600; margin-right: 15px;">{{ _('Selection Mode:') }}</label>
|
||||
<label style="margin-right: 15px;">
|
||||
|
||||
@@ -2,7 +2,7 @@ from flask import Blueprint, request, redirect, url_for, flash
|
||||
from flask_babel import gettext
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
|
||||
@@ -24,8 +24,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
flash(gettext('Warning, URL {} already exists').format(url), "notice")
|
||||
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
processor = request.form.get('processor', 'text_json_diff')
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
from changedetectionio import processors
|
||||
processor = request.form.get('processor', processors.get_default_processor())
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags','').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
|
||||
if new_uuid:
|
||||
if add_paused:
|
||||
@@ -33,9 +34,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||
else:
|
||||
# Straight into the queue.
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
flash(gettext("Watch added."))
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
||||
|
||||
return views_blueprint
|
||||
return views_blueprint
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
{%- extends 'base.html' -%}
|
||||
{%- block content -%}
|
||||
{%- set tips = [
|
||||
_("Changedetection.io can monitor more than just web-pages! See our plugins!") ~ ' <a href="https://changedetection.io/plugins">' ~ _('More info') ~ '</a>',
|
||||
_("You can also add 'shared' watches.") ~ ' <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">' ~ _('More info') ~ '</a>'
|
||||
] -%}
|
||||
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
@@ -69,7 +73,9 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
</div>
|
||||
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;"><img alt="{{ _('Create a shareable link') }}" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > {{ _("Tip: You can also add 'shared' watches.") }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">{{ _('More info') }}</a></span>
|
||||
<span style="color:#eee; font-size: 80%;">
|
||||
<strong>Tip: </strong> {{ tips | random | safe }}
|
||||
</span>
|
||||
</form>
|
||||
</div>
|
||||
<div class="box">
|
||||
@@ -205,23 +211,24 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<a href="{{url_for('watchlist.index', tag=watch_tag_uuid) }}" class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</a>
|
||||
{%- endfor -%}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
@@ -349,12 +350,7 @@ class fetcher(Fetcher):
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
# Cleanup before raising to prevent memory leak
|
||||
await self.page.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
# Force garbage collection to release Playwright resources immediately
|
||||
gc.collect()
|
||||
# Finally block will handle cleanup
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
@@ -370,12 +366,7 @@ class fetcher(Fetcher):
|
||||
try:
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
except BrowserStepsStepException:
|
||||
try:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
except Exception as e:
|
||||
# Fine, could be messy situation
|
||||
pass
|
||||
# Finally block will handle cleanup
|
||||
raise
|
||||
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
@@ -424,35 +415,40 @@ class fetcher(Fetcher):
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
|
||||
finally:
|
||||
# Request garbage collection one more time before closing
|
||||
# Clean up resources properly with timeouts to prevent hanging
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Clean up resources properly
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
if hasattr(self, 'page') and self.page:
|
||||
await self.page.request_gc()
|
||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed page for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing page for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing page for {url}: {e}")
|
||||
finally:
|
||||
self.page = None
|
||||
|
||||
try:
|
||||
await self.page.close()
|
||||
except:
|
||||
pass
|
||||
self.page = None
|
||||
if context:
|
||||
await asyncio.wait_for(context.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed context for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing context for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing context for {url}: {e}")
|
||||
finally:
|
||||
context = None
|
||||
|
||||
try:
|
||||
await context.close()
|
||||
except:
|
||||
pass
|
||||
context = None
|
||||
|
||||
try:
|
||||
await browser.close()
|
||||
except:
|
||||
pass
|
||||
browser = None
|
||||
if browser:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed browser connection for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing browser connection for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing browser for {url}: {e}")
|
||||
finally:
|
||||
browser = None
|
||||
|
||||
# Force Python GC to release Playwright resources immediately
|
||||
# Playwright objects can have circular references that delay cleanup
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import websockets.exceptions
|
||||
@@ -221,19 +222,36 @@ class fetcher(Fetcher):
|
||||
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
|
||||
|
||||
async def quit(self, watch=None):
|
||||
try:
|
||||
await self.page.close()
|
||||
del self.page
|
||||
except Exception as e:
|
||||
pass
|
||||
watch_uuid = watch.get('uuid') if watch else 'unknown'
|
||||
|
||||
# Close page
|
||||
try:
|
||||
await self.browser.close()
|
||||
del self.browser
|
||||
if hasattr(self, 'page') and self.page:
|
||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Page closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing page (5s)")
|
||||
except Exception as e:
|
||||
pass
|
||||
logger.warning(f"[{watch_uuid}] Error closing page: {e}")
|
||||
finally:
|
||||
self.page = None
|
||||
|
||||
logger.info("Cleanup puppeteer complete.")
|
||||
# Close browser connection
|
||||
try:
|
||||
if hasattr(self, 'browser') and self.browser:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Browser closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing browser (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{watch_uuid}] Error closing browser: {e}")
|
||||
finally:
|
||||
self.browser = None
|
||||
|
||||
logger.info(f"[{watch_uuid}] Cleanup puppeteer complete")
|
||||
|
||||
# Force garbage collection to release resources
|
||||
gc.collect()
|
||||
|
||||
async def fetch_page(self,
|
||||
current_include_filters,
|
||||
@@ -263,9 +281,11 @@ class fetcher(Fetcher):
|
||||
# Connect directly using the specified browser_ws_endpoint
|
||||
# @todo timeout
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Connecting to browser at {self.browser_connection_url}")
|
||||
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||
ignoreHTTPSErrors=True
|
||||
)
|
||||
logger.debug(f"[{watch_uuid}] Browser connected successfully")
|
||||
except websockets.exceptions.InvalidStatusCode as e:
|
||||
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
||||
except websockets.exceptions.InvalidURI:
|
||||
@@ -274,7 +294,18 @@ class fetcher(Fetcher):
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||
|
||||
# more reliable is to just request a new page
|
||||
self.page = await self.browser.newPage()
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Creating new page")
|
||||
self.page = await self.browser.newPage()
|
||||
logger.debug(f"[{watch_uuid}] Page created successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"[{watch_uuid}] Failed to create new page: {e}")
|
||||
# Browser is connected but page creation failed - must cleanup browser
|
||||
try:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
||||
raise
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||
@@ -343,6 +374,12 @@ class fetcher(Fetcher):
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
await asyncio.sleep(w)
|
||||
|
||||
# Check if page still exists (might have been closed due to error during sleep)
|
||||
if not self.page or not hasattr(self.page, '_client'):
|
||||
logger.debug("Page already closed, skipping stopLoading")
|
||||
return
|
||||
|
||||
logger.debug("Issuing stopLoading command...")
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
@@ -368,7 +405,9 @@ class fetcher(Fetcher):
|
||||
asyncio.create_task(handle_frame_navigation())
|
||||
response = await self.page.goto(url, timeout=0)
|
||||
await asyncio.sleep(1 + extra_wait)
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
# Check if page still exists before sending command
|
||||
if self.page and hasattr(self.page, '_client'):
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
|
||||
if response:
|
||||
break
|
||||
@@ -437,15 +476,9 @@ class fetcher(Fetcher):
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - pyppeteer base64 decode creates temporary buffers
|
||||
# Force garbage collection - pyppeteer base64 decode creates temporary buffers
|
||||
import gc
|
||||
gc.collect()
|
||||
# Release C-level memory from base64 decode back to OS
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
|
||||
@@ -55,6 +55,26 @@ class fetcher(Fetcher):
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
# Configure retry adapter for low-level network errors only
|
||||
# Retries connection timeouts, read timeouts, connection resets - not HTTP status codes
|
||||
# Especially helpful in parallel test execution when servers are slow/overloaded
|
||||
# Configurable via REQUESTS_RETRY_MAX_COUNT (default: 3 attempts)
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
max_retries = int(os.getenv("REQUESTS_RETRY_MAX_COUNT", "6"))
|
||||
retry_strategy = Retry(
|
||||
total=max_retries,
|
||||
connect=max_retries, # Retry connection timeouts
|
||||
read=max_retries, # Retry read timeouts
|
||||
status=0, # Don't retry on HTTP status codes
|
||||
backoff_factor=0.5, # Wait 0.3s, 0.6s, 1.2s between retries
|
||||
allowed_methods=["HEAD", "GET", "OPTIONS", "POST"],
|
||||
raise_on_status=False
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||
from requests_file import FileAdapter
|
||||
@@ -142,10 +162,11 @@ class fetcher(Fetcher):
|
||||
watch_uuid=None,
|
||||
):
|
||||
"""Async wrapper that runs the synchronous requests code in a thread pool"""
|
||||
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
|
||||
# Run the synchronous _run_sync in a thread pool to avoid blocking the event loop
|
||||
# Retry logic is handled by requests' HTTPAdapter (see _run_sync for configuration)
|
||||
await loop.run_in_executor(
|
||||
None, # Use default ThreadPoolExecutor
|
||||
lambda: self._run_sync(
|
||||
|
||||
@@ -14,7 +14,7 @@ from pathlib import Path
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
from flask import (
|
||||
Flask,
|
||||
@@ -195,7 +195,7 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
|
||||
@app.template_global('is_checking_now')
|
||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
return worker_handler.is_watch_running(watch_obj['uuid'])
|
||||
return worker_pool.is_watch_running(watch_obj['uuid'])
|
||||
|
||||
@app.template_global('get_watch_queue_position')
|
||||
def _get_watch_queue_position(watch_obj):
|
||||
@@ -206,13 +206,13 @@ def _get_watch_queue_position(watch_obj):
|
||||
@app.template_global('get_current_worker_count')
|
||||
def _get_current_worker_count():
|
||||
"""Get the current number of operational workers"""
|
||||
return worker_handler.get_worker_count()
|
||||
return worker_pool.get_worker_count()
|
||||
|
||||
@app.template_global('get_worker_status_info')
|
||||
def _get_worker_status_info():
|
||||
"""Get detailed worker status information for display"""
|
||||
status = worker_handler.get_worker_status()
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
status = worker_pool.get_worker_status()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
return {
|
||||
'count': status['worker_count'],
|
||||
@@ -801,7 +801,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
# watchlist UI buttons etc
|
||||
import changedetectionio.blueprint.ui as ui
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update))
|
||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_pool, queuedWatchMetaData, watch_check_update))
|
||||
|
||||
import changedetectionio.blueprint.watchlist as watchlist
|
||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||
@@ -838,10 +838,10 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
|
||||
# Get basic status
|
||||
status = worker_handler.get_worker_status()
|
||||
status = worker_pool.get_worker_status()
|
||||
|
||||
# Perform health check
|
||||
health_result = worker_handler.check_worker_health(
|
||||
health_result = worker_pool.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -905,14 +905,24 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Can be overridden by ENV or use the default settings
|
||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
logger.info(f"Starting {n_workers} workers during app initialization")
|
||||
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
worker_pool.start_workers(n_workers, update_q, notification_q, app, datastore)
|
||||
|
||||
# Skip background threads in batch mode (just process queue and exit)
|
||||
batch_mode = app.config.get('batch_mode', False)
|
||||
if not batch_mode:
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks, daemon=True, name="TickerThread-ScheduleChecker").start()
|
||||
threading.Thread(target=notification_runner, daemon=True, name="NotificationRunner").start()
|
||||
|
||||
# Start configurable number of notification workers (default 1)
|
||||
notification_workers = int(os.getenv("NOTIFICATION_WORKERS", "1"))
|
||||
for i in range(notification_workers):
|
||||
threading.Thread(
|
||||
target=notification_runner,
|
||||
args=(i,),
|
||||
daemon=True,
|
||||
name=f"NotificationRunner-{i}"
|
||||
).start()
|
||||
logger.info(f"Started {notification_workers} notification worker(s)")
|
||||
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
@@ -954,14 +964,14 @@ def check_for_new_version():
|
||||
app.config.exit.wait(86400)
|
||||
|
||||
|
||||
def notification_runner():
|
||||
def notification_runner(worker_id=0):
|
||||
global notification_debug_log
|
||||
from datetime import datetime
|
||||
import json
|
||||
with app.app_context():
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
# At the moment only one thread runs (single runner)
|
||||
# Multiple workers can run concurrently (configurable via NOTIFICATION_WORKERS)
|
||||
n_object = notification_q.get(block=False)
|
||||
except queue.Empty:
|
||||
app.config.exit.wait(1)
|
||||
@@ -987,7 +997,7 @@ def notification_runner():
|
||||
sent_obj = process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
logger.error(f"Notification worker {worker_id} - Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||
|
||||
# UUID wont be present when we submit a 'test' from the global settings
|
||||
if 'uuid' in n_object:
|
||||
@@ -1028,7 +1038,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
now = time.time()
|
||||
if now - last_health_check > 60:
|
||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
health_result = worker_handler.check_worker_health(
|
||||
health_result = worker_pool.check_worker_health(
|
||||
expected_count=expected_workers,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
@@ -1047,7 +1057,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
continue
|
||||
|
||||
# Get a list of watches by UUID that are currently fetching data
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
# Build set of queued UUIDs once for O(1) lookup instead of O(n) per watch
|
||||
queued_uuids = {q_item.item['uuid'] for q_item in update_q.queue}
|
||||
@@ -1153,7 +1163,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
priority = int(time.time())
|
||||
|
||||
# Into the queue with you
|
||||
queued_successfully = worker_handler.queue_item_async_safe(update_q,
|
||||
queued_successfully = worker_pool.queue_item_async_safe(update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=priority,
|
||||
item={'uuid': uuid})
|
||||
)
|
||||
|
||||
@@ -730,7 +730,7 @@ class quickWatchForm(Form):
|
||||
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
||||
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
@@ -749,7 +749,7 @@ class commonSettingsForm(Form):
|
||||
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))])
|
||||
|
||||
@@ -763,7 +763,7 @@ class commonSettingsForm(Form):
|
||||
|
||||
|
||||
class importForm(Form):
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
urls = TextAreaField(_l('URLs'))
|
||||
xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))])
|
||||
file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||
|
||||
@@ -52,7 +52,13 @@ def render(template_str, **args: t.Any) -> str:
|
||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||
|
||||
def render_fully_escaped(content):
|
||||
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
|
||||
template = env.from_string("{{ some_html|e }}")
|
||||
return template.render(some_html=content)
|
||||
"""
|
||||
Escape HTML content safely.
|
||||
|
||||
MEMORY LEAK FIX: Use markupsafe.escape() directly instead of creating
|
||||
Jinja2 environments (was causing 1M+ compilations per page load).
|
||||
Simpler, faster, and no concerns about environment state.
|
||||
"""
|
||||
from markupsafe import escape
|
||||
return str(escape(content))
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ class model(dict):
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
|
||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
|
||||
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "5")), # Number of threads, lower is better for slow connections
|
||||
'default_ua': {
|
||||
'html_requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT),
|
||||
'html_webdriver': None,
|
||||
|
||||
@@ -613,6 +613,11 @@ class model(watch_base):
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
|
||||
# Invalidate favicon filename cache
|
||||
if hasattr(self, '_favicon_filename_cache'):
|
||||
delattr(self, '_favicon_filename_cache')
|
||||
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
@@ -629,20 +634,32 @@ class model(watch_base):
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
MEMORY LEAK FIX: Cache the result to avoid repeated glob.glob() operations.
|
||||
glob.glob() causes millions of fnmatch allocations when called for every watch on page load.
|
||||
|
||||
Returns:
|
||||
bytes: Contents of the newest favicon file, or None if not found.
|
||||
str: Basename of the newest favicon file, or None if not found.
|
||||
"""
|
||||
# Check cache first (prevents 26M+ allocations from repeated glob operations)
|
||||
cache_key = '_favicon_filename_cache'
|
||||
if hasattr(self, cache_key):
|
||||
return getattr(self, cache_key)
|
||||
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
return None
|
||||
result = None
|
||||
else:
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
result = os.path.basename(newest_file)
|
||||
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
return os.path.basename(newest_file)
|
||||
# Cache the result
|
||||
setattr(self, cache_key, result)
|
||||
return result
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
|
||||
@@ -105,6 +105,30 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def register_processor(self):
|
||||
"""Register an external processor plugin.
|
||||
|
||||
External packages can implement this hook to register custom processors
|
||||
that will be discovered alongside built-in processors.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with processor information:
|
||||
{
|
||||
'processor_name': str, # Machine name (e.g., 'osint_recon')
|
||||
'processor_module': module, # Module containing processor.py
|
||||
'processor_class': class, # The perform_site_check class
|
||||
'metadata': { # Optional metadata
|
||||
'name': str, # Display name
|
||||
'description': str, # Description
|
||||
'processor_weight': int,# Sort weight (lower = higher priority)
|
||||
'list_badge_text': str, # Badge text for UI
|
||||
}
|
||||
}
|
||||
Return None if this plugin doesn't provide a processor
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
|
||||
@@ -17,9 +17,11 @@ def find_sub_packages(package_name):
|
||||
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def find_processors():
|
||||
"""
|
||||
Find all subclasses of DifferenceDetectionProcessor in the specified package.
|
||||
Results are cached to avoid repeated discovery.
|
||||
|
||||
:param package_name: The name of the package to scan for processor modules.
|
||||
:return: A list of (module, class) tuples.
|
||||
@@ -46,6 +48,23 @@ def find_processors():
|
||||
except (ModuleNotFoundError, ImportError) as e:
|
||||
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
||||
|
||||
# Discover plugin processors via pluggy
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_module = result.get('processor_module')
|
||||
processor_name = result.get('processor_name')
|
||||
|
||||
if processor_module and processor_name:
|
||||
processors.append((processor_module, processor_name))
|
||||
plugin_path = getattr(processor_module, '__file__', 'unknown location')
|
||||
logger.info(f"Registered plugin processor: {processor_name} from {plugin_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error loading plugin processors: {e}")
|
||||
|
||||
return processors
|
||||
|
||||
|
||||
@@ -97,54 +116,137 @@ def find_processor_module(processor_name):
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_module(processor_name):
|
||||
"""
|
||||
Get the actual processor module (with perform_site_check class) by name.
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
|
||||
Returns:
|
||||
module: The processor module containing perform_site_check, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if processor_tuple:
|
||||
# Return the actual processor module (first element of tuple)
|
||||
return processor_tuple[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_submodule(processor_name, submodule_name):
|
||||
"""
|
||||
Get an optional submodule from a processor (e.g., 'difference', 'extract', 'preview').
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
submodule_name: Name of the submodule (e.g., 'difference', 'extract', 'preview')
|
||||
|
||||
Returns:
|
||||
module: The submodule if it exists, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if not processor_tuple:
|
||||
return None
|
||||
|
||||
processor_module = processor_tuple[0]
|
||||
parent_module = get_parent_module(processor_module)
|
||||
|
||||
if not parent_module:
|
||||
return None
|
||||
|
||||
# Try to import the submodule
|
||||
try:
|
||||
# For built-in processors: changedetectionio.processors.text_json_diff.difference
|
||||
# For plugin processors: changedetectionio_osint.difference
|
||||
parent_module_name = parent_module.__name__
|
||||
submodule_full_name = f"{parent_module_name}.{submodule_name}"
|
||||
return importlib.import_module(submodule_full_name)
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_plugin_processor_metadata():
|
||||
"""Get metadata from plugin processors."""
|
||||
metadata = {}
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_name = result.get('processor_name')
|
||||
meta = result.get('metadata', {})
|
||||
if processor_name:
|
||||
metadata[processor_name] = meta
|
||||
except Exception as e:
|
||||
logger.warning(f"Error getting plugin processor metadata: {e}")
|
||||
return metadata
|
||||
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
|
||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||
:return: A list :)
|
||||
"""
|
||||
|
||||
processor_classes = find_processors()
|
||||
|
||||
# Check if ALLOWED_PROCESSORS env var is set
|
||||
# For now we disable it, need to make a deploy with lots of new code and this will be an overload
|
||||
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', 'text_json_diff, restock_diff').strip()
|
||||
allowed_processors = None
|
||||
if allowed_processors_env:
|
||||
# Check if DISABLED_PROCESSORS env var is set
|
||||
disabled_processors_env = os.getenv('DISABLED_PROCESSORS', 'image_ssim_diff').strip()
|
||||
disabled_processors = []
|
||||
if disabled_processors_env:
|
||||
# Parse comma-separated list and strip whitespace
|
||||
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
|
||||
disabled_processors = [p.strip() for p in disabled_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"DISABLED_PROCESSORS set, disabling: {disabled_processors}")
|
||||
|
||||
available = []
|
||||
plugin_metadata = get_plugin_processor_metadata()
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Filter by allowed processors if set
|
||||
if allowed_processors and sub_package_name not in allowed_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
|
||||
# Skip disabled processors
|
||||
if sub_package_name in disabled_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (in DISABLED_PROCESSORS)")
|
||||
continue
|
||||
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
# Check if this is a plugin processor
|
||||
if sub_package_name in plugin_metadata:
|
||||
meta = plugin_metadata[sub_package_name]
|
||||
description = gettext(meta.get('name', sub_package_name))
|
||||
# Plugin processors start from weight 10 to separate them from built-in processors
|
||||
weight = 100 + meta.get('processor_weight', 0)
|
||||
else:
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
|
||||
available.append((sub_package_name, description, weight))
|
||||
|
||||
@@ -155,6 +257,20 @@ def available_processors():
|
||||
return [(name, desc) for name, desc, weight in available]
|
||||
|
||||
|
||||
def get_default_processor():
|
||||
"""
|
||||
Get the default processor to use when none is specified.
|
||||
Returns the first available processor based on weight (lowest weight = highest priority).
|
||||
This ensures forms auto-select a valid processor even when DISABLED_PROCESSORS filters the list.
|
||||
|
||||
:return: The processor name string (e.g., 'text_json_diff')
|
||||
"""
|
||||
available = available_processors()
|
||||
if available:
|
||||
return available[0][0] # Return the processor name from first tuple
|
||||
return 'text_json_diff' # Fallback if somehow no processors are available
|
||||
|
||||
|
||||
def get_processor_badge_texts():
|
||||
"""
|
||||
Get a dictionary mapping processor names to their list_badge_text values.
|
||||
@@ -279,3 +395,76 @@ def get_processor_badge_css():
|
||||
|
||||
return '\n\n'.join(css_rules)
|
||||
|
||||
|
||||
def save_processor_config(datastore, watch_uuid, config_data):
|
||||
"""
|
||||
Save processor-specific configuration to JSON file.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration storage.
|
||||
|
||||
Args:
|
||||
datastore: The application datastore instance
|
||||
watch_uuid: UUID of the watch
|
||||
config_data: Dictionary of configuration data to save (with processor_config_* prefix removed)
|
||||
|
||||
Returns:
|
||||
bool: True if saved successfully, False otherwise
|
||||
"""
|
||||
if not config_data:
|
||||
return True
|
||||
|
||||
try:
|
||||
from changedetectionio.processors.base import difference_detection_processor
|
||||
|
||||
# Get processor name from watch
|
||||
watch = datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
logger.error(f"Cannot save processor config: watch {watch_uuid} not found")
|
||||
return False
|
||||
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = difference_detection_processor(datastore, watch_uuid)
|
||||
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, config_data)
|
||||
|
||||
logger.debug(f"Saved processor config to {config_filename}: {config_data}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def extract_processor_config_from_form_data(form_data):
|
||||
"""
|
||||
Extract processor_config_* fields from form data and return separate dicts.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration extraction.
|
||||
|
||||
IMPORTANT: This function modifies form_data in-place by removing processor_config_* fields.
|
||||
|
||||
Args:
|
||||
form_data: Dictionary of form data (will be modified in-place)
|
||||
|
||||
Returns:
|
||||
dict: Dictionary of processor config data (with processor_config_* prefix removed)
|
||||
"""
|
||||
processor_config_data = {}
|
||||
|
||||
# Use list() to create a copy of keys since we're modifying the dict
|
||||
for field_name in list(form_data.keys()):
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
# Save all values (including empty strings) to allow explicit clearing of settings
|
||||
processor_config_data[config_key] = form_data[field_name]
|
||||
# Remove from form_data to prevent it from reaching datastore
|
||||
del form_data[field_name]
|
||||
|
||||
return processor_config_data
|
||||
|
||||
|
||||
@@ -12,6 +12,13 @@ processor_description = "Visual/Screenshot change detection (Fast)"
|
||||
processor_name = "image_ssim_diff"
|
||||
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = False
|
||||
supports_text_filters_and_triggers_elements = False
|
||||
supports_request_type = True
|
||||
|
||||
PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json"
|
||||
|
||||
# Subprocess timeout settings
|
||||
|
||||
@@ -4,6 +4,13 @@ from changedetectionio.model.Watch import model as BaseWatch
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
|
||||
from loguru import logger
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
|
||||
|
||||
|
||||
def _task(watch, update_handler):
|
||||
|
||||
@@ -5,51 +5,57 @@ import heapq
|
||||
import queue
|
||||
import threading
|
||||
|
||||
try:
|
||||
import janus
|
||||
except ImportError:
|
||||
logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus")
|
||||
raise
|
||||
# Janus is no longer required - we use pure threading.Queue for multi-loop support
|
||||
# try:
|
||||
# import janus
|
||||
# except ImportError:
|
||||
# pass # Not needed anymore
|
||||
|
||||
|
||||
class RecheckPriorityQueue:
|
||||
"""
|
||||
Ultra-reliable priority queue using janus for async/sync bridging.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers (the actual fetchers/processors) and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
|
||||
- Synchronous code (Flask, threads) cannot use async methods without blocking
|
||||
- Async code cannot use sync methods without blocking the event loop
|
||||
- janus provides the only safe bridge between these two worlds
|
||||
|
||||
Attempting to unify to async-only would require:
|
||||
- Converting all Flask routes to async (major breaking change)
|
||||
- Using asyncio.run() in sync contexts (causes deadlocks)
|
||||
- Thread-pool wrapping (adds complexity and overhead)
|
||||
|
||||
Minimal implementation focused on reliability:
|
||||
- Pure janus for sync/async bridge
|
||||
- Thread-safe priority ordering
|
||||
- Bulletproof error handling with critical logging
|
||||
Thread-safe priority queue supporting multiple async event loops.
|
||||
|
||||
ARCHITECTURE:
|
||||
- Multiple async workers, each with its own event loop in its own thread
|
||||
- Hybrid sync/async design for maximum scalability
|
||||
- Sync interface for ticker thread (threading.Queue)
|
||||
- Async interface for workers (asyncio.Event - NO executor threads!)
|
||||
|
||||
SCALABILITY:
|
||||
- Scales to 100-200+ workers without executor thread exhaustion
|
||||
- Async workers wait on asyncio.Event (pure coroutines, no threads)
|
||||
- Sync callers use threading.Queue (backward compatible)
|
||||
|
||||
WHY NOT JANUS:
|
||||
- Janus binds to ONE event loop at creation time
|
||||
- Our architecture has 15+ workers, each with separate event loops
|
||||
- Workers in different threads/loops cannot share janus async interface
|
||||
|
||||
WHY NOT RUN_IN_EXECUTOR:
|
||||
- With 200 workers, run_in_executor() would block 200 threads
|
||||
- Exhausts ThreadPoolExecutor, starves Flask HTTP handlers
|
||||
- Pure async approach uses 0 threads while waiting
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, ticker thread
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
|
||||
import asyncio
|
||||
|
||||
# Sync interface: threading.Queue for ticker thread and Flask routes
|
||||
self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)
|
||||
|
||||
# Priority storage - thread-safe
|
||||
self._priority_items = []
|
||||
self._lock = threading.RLock()
|
||||
|
||||
|
||||
# No event signaling needed - pure polling approach
|
||||
# Workers check queue every 50ms (latency acceptable: 0-500ms)
|
||||
# Scales to 1000+ workers: each sleeping worker = ~4KB coroutine, not thread
|
||||
|
||||
# Signals for UI updates
|
||||
self.queue_length_signal = signal('queue_length')
|
||||
|
||||
|
||||
logger.debug("RecheckPriorityQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
|
||||
@@ -58,38 +64,48 @@ class RecheckPriorityQueue:
|
||||
# SYNC INTERFACE (for ticker thread)
|
||||
def put(self, item, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with priority ordering"""
|
||||
logger.trace(f"RecheckQueue.put() called for item: {self._get_item_uuid(item)}, block={block}, timeout={timeout}")
|
||||
try:
|
||||
# Add to priority storage
|
||||
# CRITICAL: Add to both priority storage AND notification queue atomically
|
||||
# to prevent desynchronization where item exists but no notification
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus sync queue
|
||||
self.sync_q.put(True, block=block, timeout=timeout)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
|
||||
# Add notification - use blocking with timeout for safety
|
||||
# Notification queue is unlimited size, so should never block in practice
|
||||
# but timeout ensures we detect any unexpected issues (deadlock, etc)
|
||||
try:
|
||||
self._notification_queue.put(True, block=True, timeout=5.0)
|
||||
except Exception as notif_e:
|
||||
# Notification failed - MUST remove from priority_items to keep in sync
|
||||
# This prevents "Priority queue inconsistency" errors in get()
|
||||
logger.critical(f"CRITICAL: Notification queue put failed, removing from priority_items: {notif_e}")
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
raise # Re-raise to be caught by outer exception handler
|
||||
|
||||
# Signal emission after successful queue - log but don't fail the operation
|
||||
# Item is already safely queued, so signal failure shouldn't affect queue state
|
||||
try:
|
||||
self._emit_put_signals(item)
|
||||
except Exception as signal_e:
|
||||
logger.error(f"Failed to emit put signals but item queued successfully: {signal_e}")
|
||||
|
||||
logger.trace(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {type(e).__name__}: {str(e)}")
|
||||
# Item should have been cleaned up in the inner try/except if notification failed
|
||||
return False
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get with priority ordering"""
|
||||
import queue
|
||||
logger.trace(f"RecheckQueue.get() called, block={block}, timeout={timeout}")
|
||||
import queue as queue_module
|
||||
try:
|
||||
# Wait for notification
|
||||
self.sync_q.get(block=block, timeout=timeout)
|
||||
# Wait for notification (this doesn't return the actual item, just signals availability)
|
||||
self._notification_queue.get(block=block, timeout=timeout)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
@@ -98,69 +114,91 @@ class RecheckPriorityQueue:
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
# Signal emission after successful retrieval - log but don't lose the item
|
||||
# Item is already retrieved, so signal failure shouldn't affect queue state
|
||||
try:
|
||||
self._emit_get_signals()
|
||||
except Exception as signal_e:
|
||||
logger.error(f"Failed to emit get signals but item retrieved successfully: {signal_e}")
|
||||
|
||||
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
logger.trace(f"RecheckQueue.get() successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except queue.Empty:
|
||||
# Queue is empty with timeout - expected behavior, re-raise without logging
|
||||
raise
|
||||
except queue_module.Empty:
|
||||
# Queue is empty with timeout - expected behavior
|
||||
logger.trace(f"RecheckQueue.get() timed out - queue is empty (timeout={timeout})")
|
||||
raise # noqa
|
||||
except Exception as e:
|
||||
# Re-raise without logging - caller (worker) will handle and log appropriately
|
||||
logger.trace(f"RecheckQueue.get() failed with exception: {type(e).__name__}: {str(e)}")
|
||||
raise
|
||||
|
||||
# ASYNC INTERFACE (for workers)
|
||||
async def async_put(self, item):
|
||||
"""Pure async put with priority ordering"""
|
||||
async def async_put(self, item, executor=None):
|
||||
"""Async put with priority ordering - uses thread pool to avoid blocking
|
||||
|
||||
Args:
|
||||
item: Item to add to queue
|
||||
executor: Optional ThreadPoolExecutor. If None, uses default pool.
|
||||
"""
|
||||
logger.trace(f"RecheckQueue.async_put() called for item: {self._get_item_uuid(item)}, executor={executor}")
|
||||
import asyncio
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus async queue
|
||||
await self.async_q.put(True)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
# Use run_in_executor to call sync put without blocking event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
executor, # Use provided executor or default
|
||||
lambda: self.put(item, block=True, timeout=5.0)
|
||||
)
|
||||
|
||||
logger.trace(f"RecheckQueue.async_put() successfully queued item: {self._get_item_uuid(item)}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get with priority ordering"""
|
||||
|
||||
async def async_get(self, executor=None, timeout=1.0):
|
||||
"""
|
||||
Efficient async get using executor for blocking call.
|
||||
|
||||
HYBRID APPROACH: Best of both worlds
|
||||
- Uses run_in_executor for efficient blocking (no polling overhead)
|
||||
- Single timeout (no double-timeout race condition)
|
||||
- Scales well: executor sized to match worker count
|
||||
|
||||
With FETCH_WORKERS=10: 10 threads blocked max (acceptable)
|
||||
With FETCH_WORKERS=200: Need executor with 200+ threads (see worker_pool.py)
|
||||
|
||||
Args:
|
||||
executor: ThreadPoolExecutor (sized to match worker count)
|
||||
timeout: Maximum time to wait in seconds
|
||||
|
||||
Returns:
|
||||
Item from queue
|
||||
|
||||
Raises:
|
||||
queue.Empty: If timeout expires with no item available
|
||||
"""
|
||||
logger.trace(f"RecheckQueue.async_get() called, timeout={timeout}")
|
||||
import asyncio
|
||||
try:
|
||||
# Wait for notification
|
||||
await self.async_q.get()
|
||||
# Use run_in_executor to call sync get efficiently
|
||||
# No outer asyncio.wait_for wrapper = no double timeout issue!
|
||||
loop = asyncio.get_event_loop()
|
||||
item = await loop.run_in_executor(
|
||||
executor,
|
||||
lambda: self.get(block=True, timeout=timeout)
|
||||
)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
|
||||
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
|
||||
logger.trace(f"RecheckQueue.async_get() successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except queue.Empty:
|
||||
logger.trace(f"RecheckQueue.async_get() timed out - queue is empty")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {type(e).__name__}: {str(e)}")
|
||||
raise
|
||||
|
||||
# UTILITY METHODS
|
||||
@@ -186,10 +224,35 @@ class RecheckPriorityQueue:
|
||||
logger.critical(f"CRITICAL: Failed to get queued UUIDs: {str(e)}")
|
||||
return []
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
def clear(self):
|
||||
"""Clear all items from both priority storage and notification queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
with self._lock:
|
||||
# Clear priority items
|
||||
self._priority_items.clear()
|
||||
|
||||
# Drain all notifications to prevent stale notifications
|
||||
# This is critical for test cleanup to prevent queue desynchronization
|
||||
drained = 0
|
||||
while not self._notification_queue.empty():
|
||||
try:
|
||||
self._notification_queue.get_nowait()
|
||||
drained += 1
|
||||
except queue.Empty:
|
||||
break
|
||||
|
||||
if drained > 0:
|
||||
logger.debug(f"Cleared queue: removed {drained} notifications")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to clear queue: {str(e)}")
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""Close the queue"""
|
||||
try:
|
||||
# Nothing to close for threading.Queue
|
||||
logger.debug("RecheckPriorityQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")
|
||||
@@ -321,7 +384,7 @@ class RecheckPriorityQueue:
|
||||
except Exception:
|
||||
pass
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def _emit_put_signals(self, item):
|
||||
"""Emit signals when item is added"""
|
||||
try:
|
||||
@@ -330,14 +393,14 @@ class RecheckPriorityQueue:
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=item.item['uuid'])
|
||||
|
||||
|
||||
# Queue length signal
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}")
|
||||
|
||||
|
||||
def _emit_get_signals(self):
|
||||
"""Emit signals when item is removed"""
|
||||
try:
|
||||
@@ -363,12 +426,11 @@ class NotificationQueue:
|
||||
|
||||
def __init__(self, maxsize: int = 0, datastore=None):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, threads
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
# Use pure threading.Queue to avoid event loop binding issues
|
||||
self._notification_queue = queue.Queue(maxsize=maxsize if maxsize > 0 else 0)
|
||||
self.notification_event_signal = signal('notification_event')
|
||||
self.datastore = datastore # For checking all_muted setting
|
||||
self._lock = threading.RLock()
|
||||
logger.debug("NotificationQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
|
||||
@@ -380,72 +442,97 @@ class NotificationQueue:
|
||||
|
||||
def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with signal emission"""
|
||||
logger.trace(f"NotificationQueue.put() called for item: {item.get('uuid', 'unknown')}, block={block}, timeout={timeout}")
|
||||
try:
|
||||
# Check if all notifications are muted
|
||||
if self.datastore and self.datastore.data['settings']['application'].get('all_muted', False):
|
||||
logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
|
||||
return False
|
||||
|
||||
self.sync_q.put(item, block=block, timeout=timeout)
|
||||
with self._lock:
|
||||
self._notification_queue.put(item, block=block, timeout=timeout)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
logger.trace(f"NotificationQueue.put() successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_put(self, item: Dict[str, Any]):
|
||||
"""Pure async put with signal emission"""
|
||||
async def async_put(self, item: Dict[str, Any], executor=None):
|
||||
"""Async put with signal emission - uses thread pool
|
||||
|
||||
Args:
|
||||
item: Notification item to queue
|
||||
executor: Optional ThreadPoolExecutor
|
||||
"""
|
||||
logger.trace(f"NotificationQueue.async_put() called for item: {item.get('uuid', 'unknown')}, executor={executor}")
|
||||
import asyncio
|
||||
try:
|
||||
# Check if all notifications are muted
|
||||
if self.datastore and self.datastore.data['settings']['application'].get('all_muted', False):
|
||||
logger.debug(f"Notification blocked - all notifications are muted: {item.get('uuid', 'unknown')}")
|
||||
return False
|
||||
|
||||
await self.async_q.put(item)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(executor, lambda: self.put(item, block=True, timeout=5.0))
|
||||
logger.trace(f"NotificationQueue.async_put() successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get"""
|
||||
logger.trace(f"NotificationQueue.get() called, block={block}, timeout={timeout}")
|
||||
try:
|
||||
return self.sync_q.get(block=block, timeout=timeout)
|
||||
with self._lock:
|
||||
item = self._notification_queue.get(block=block, timeout=timeout)
|
||||
logger.trace(f"NotificationQueue.get() retrieved item: {item.get('uuid', 'unknown') if isinstance(item, dict) else 'unknown'}")
|
||||
return item
|
||||
except queue.Empty as e:
|
||||
logger.trace(f"NotificationQueue.get() timed out - queue is empty (timeout={timeout})")
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {type(e).__name__}: {str(e)}")
|
||||
raise e
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get"""
|
||||
|
||||
async def async_get(self, executor=None):
|
||||
"""Async get - uses thread pool
|
||||
|
||||
Args:
|
||||
executor: Optional ThreadPoolExecutor
|
||||
"""
|
||||
logger.trace(f"NotificationQueue.async_get() called, executor={executor}")
|
||||
import asyncio
|
||||
try:
|
||||
return await self.async_q.get()
|
||||
loop = asyncio.get_event_loop()
|
||||
item = await loop.run_in_executor(executor, lambda: self.get(block=True, timeout=1.0))
|
||||
logger.trace(f"NotificationQueue.async_get() retrieved item: {item.get('uuid', 'unknown') if isinstance(item, dict) else 'unknown'}")
|
||||
return item
|
||||
except queue.Empty as e:
|
||||
logger.trace(f"NotificationQueue.async_get() timed out - queue is empty")
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}")
|
||||
logger.critical(f"CRITICAL: Failed to async get notification: {type(e).__name__}: {str(e)}")
|
||||
raise e
|
||||
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
return self.sync_q.qsize()
|
||||
with self._lock:
|
||||
return self._notification_queue.qsize()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
|
||||
return 0
|
||||
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
"""Close the queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
# Nothing to close for threading.Queue
|
||||
logger.debug("NotificationQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")
|
||||
|
||||
@@ -37,9 +37,9 @@ def register_watch_operation_handlers(socketio, datastore):
|
||||
# Import here to avoid circular imports
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
logger.info(f"Socket.IO: Queued recheck for watch {uuid}")
|
||||
else:
|
||||
emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'})
|
||||
|
||||
@@ -145,10 +145,10 @@ def handle_watch_update(socketio, **kwargs):
|
||||
# Emit the watch update to all connected clients
|
||||
from changedetectionio.flask_app import update_q
|
||||
from changedetectionio.flask_app import _jinja2_filter_datetime
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
|
||||
# Get list of watches that are currently running
|
||||
running_uuids = worker_handler.get_running_uuids()
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
|
||||
# Get list of watches in the queue (efficient single-lock method)
|
||||
queue_list = update_q.get_queued_uuids()
|
||||
@@ -252,7 +252,7 @@ def init_socketio(app, datastore):
|
||||
def event_checkbox_operations(data):
|
||||
from changedetectionio.blueprint.ui import _handle_operations
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, watch_check_update
|
||||
import threading
|
||||
|
||||
@@ -268,7 +268,7 @@ def init_socketio(app, datastore):
|
||||
uuids=data.get('uuids'),
|
||||
datastore=datastore,
|
||||
extra_data=data.get('extra_data'),
|
||||
worker_handler=worker_handler,
|
||||
worker_pool=worker_pool,
|
||||
update_q=update_q,
|
||||
queuedWatchMetaData=queuedWatchMetaData,
|
||||
watch_check_update=watch_check_update,
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
rm tests/logs/* -f
|
||||
|
||||
# Since theres no curl installed lets roll with python3
|
||||
check_sanity() {
|
||||
@@ -64,18 +65,21 @@ data_sanity_test
|
||||
echo "-------------------- Running rest of tests in parallel -------------------------------"
|
||||
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false \
|
||||
FETCH_WORKERS=2 REMOVE_REQUESTS_OLD_SCREENSHOTS=false \
|
||||
pytest tests/test_*.py \
|
||||
-n 30 \
|
||||
-n 18 \
|
||||
--dist=load \
|
||||
-vvv \
|
||||
-s \
|
||||
--capture=no \
|
||||
-k "not test_queue_system" \
|
||||
--log-cli-level=DEBUG \
|
||||
--log-cli-format="%(asctime)s [%(process)d] [%(levelname)s] %(name)s: %(message)s"
|
||||
|
||||
echo "---------------------------- DONE parallel test ---------------------------------------"
|
||||
|
||||
FETCH_WORKERS=20 pytest -vvv -s tests/test_queue_handler.py
|
||||
|
||||
echo "RUNNING WITH BASE_URL SET"
|
||||
|
||||
# Now re-run some tests with BASE_URL enabled
|
||||
|
||||
@@ -166,6 +166,9 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
"""
|
||||
logger.info(f"Datastore path is '{datastore_path}'")
|
||||
|
||||
# CRITICAL: Update datastore_path (was using old path from __init__)
|
||||
self.datastore_path = datastore_path
|
||||
|
||||
# Initialize data structure
|
||||
self.__data = App.model()
|
||||
self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")
|
||||
@@ -604,6 +607,19 @@ class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
|
||||
|
||||
return None
|
||||
|
||||
# Check PAGE_WATCH_LIMIT if set
|
||||
page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
|
||||
if page_watch_limit:
|
||||
try:
|
||||
page_watch_limit = int(page_watch_limit)
|
||||
current_watch_count = len(self.__data['watching'])
|
||||
if current_watch_count >= page_watch_limit:
|
||||
logger.error(f"Watch limit reached: {current_watch_count}/{page_watch_limit} watches. Cannot add {url}")
|
||||
flash(gettext("Watch limit reached ({}/{} watches). Cannot add more watches.").format(current_watch_count, page_watch_limit), 'error')
|
||||
return None
|
||||
except ValueError:
|
||||
logger.warning(f"Invalid PAGE_WATCH_LIMIT value: {page_watch_limit}, ignoring limit check")
|
||||
|
||||
if tag and type(tag) == str:
|
||||
# Then it's probably a string of the actual tag by name, split and add it
|
||||
for t in tag.split(','):
|
||||
|
||||
@@ -9,6 +9,11 @@ from changedetectionio import store
|
||||
import os
|
||||
import sys
|
||||
|
||||
# CRITICAL: Set short timeout for tests to prevent 45-second hangs
|
||||
# When test server is slow/unresponsive, workers fail fast instead of holding UUIDs for 45s
|
||||
# This prevents exponential priority growth from repeated deferrals (priority × 10 each defer)
|
||||
os.environ['DEFAULT_SETTINGS_REQUESTS_TIMEOUT'] = '5'
|
||||
|
||||
from changedetectionio.flask_app import init_app_secret, changedetection_app
|
||||
from changedetectionio.tests.util import live_server_setup, new_live_server_setup
|
||||
|
||||
@@ -29,6 +34,93 @@ def reportlog(pytestconfig):
|
||||
logger.remove(handler_id)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def per_test_log_file(request):
|
||||
"""Create a separate log file for each test function with pytest output."""
|
||||
import re
|
||||
|
||||
# Create logs directory if it doesn't exist
|
||||
log_dir = os.path.join(os.path.dirname(__file__), "logs")
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
|
||||
# Generate log filename from test name and worker ID (for parallel runs)
|
||||
test_name = request.node.name
|
||||
|
||||
# Sanitize test name - replace unsafe characters with underscores
|
||||
# Keep only alphanumeric, dash, underscore, and period
|
||||
safe_test_name = re.sub(r'[^\w\-.]', '_', test_name)
|
||||
|
||||
# Limit length to avoid filesystem issues (max 200 chars)
|
||||
if len(safe_test_name) > 200:
|
||||
# Keep first 150 chars + hash of full name + last 30 chars
|
||||
import hashlib
|
||||
name_hash = hashlib.md5(test_name.encode()).hexdigest()[:8]
|
||||
safe_test_name = f"{safe_test_name[:150]}_{name_hash}_{safe_test_name[-30:]}"
|
||||
|
||||
worker_id = os.environ.get('PYTEST_XDIST_WORKER', 'master')
|
||||
log_file = os.path.join(log_dir, f"{safe_test_name}_{worker_id}.log")
|
||||
|
||||
# Add file handler for this test with TRACE level
|
||||
handler_id = logger.add(
|
||||
log_file,
|
||||
format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {process} | {name}:{function}:{line} - {message}",
|
||||
level="TRACE",
|
||||
mode="w", # Overwrite if exists
|
||||
enqueue=True # Thread-safe
|
||||
)
|
||||
|
||||
logger.info(f"=== Starting test: {test_name} (worker: {worker_id}) ===")
|
||||
logger.info(f"Test location: {request.node.nodeid}")
|
||||
|
||||
yield
|
||||
|
||||
# Capture test outcome (PASSED/FAILED/SKIPPED/ERROR)
|
||||
outcome = "UNKNOWN"
|
||||
exc_info = None
|
||||
stdout = None
|
||||
stderr = None
|
||||
|
||||
if hasattr(request.node, 'rep_call'):
|
||||
outcome = request.node.rep_call.outcome.upper()
|
||||
if request.node.rep_call.failed:
|
||||
exc_info = request.node.rep_call.longreprtext
|
||||
# Capture stdout/stderr from call phase
|
||||
if hasattr(request.node.rep_call, 'sections'):
|
||||
for section_name, section_content in request.node.rep_call.sections:
|
||||
if 'stdout' in section_name.lower():
|
||||
stdout = section_content
|
||||
elif 'stderr' in section_name.lower():
|
||||
stderr = section_content
|
||||
elif hasattr(request.node, 'rep_setup'):
|
||||
if request.node.rep_setup.failed:
|
||||
outcome = "SETUP_FAILED"
|
||||
exc_info = request.node.rep_setup.longreprtext
|
||||
|
||||
logger.info(f"=== Test Result: {outcome} ===")
|
||||
|
||||
if exc_info:
|
||||
logger.error(f"=== Test Failure Details ===\n{exc_info}")
|
||||
|
||||
if stdout:
|
||||
logger.info(f"=== Captured stdout ===\n{stdout}")
|
||||
|
||||
if stderr:
|
||||
logger.warning(f"=== Captured stderr ===\n{stderr}")
|
||||
|
||||
logger.info(f"=== Finished test: {test_name} ===")
|
||||
logger.remove(handler_id)
|
||||
|
||||
|
||||
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
|
||||
def pytest_runtest_makereport(item, call):
|
||||
"""Hook to capture test results and attach to the test node."""
|
||||
outcome = yield
|
||||
rep = outcome.get_result()
|
||||
|
||||
# Store report on the test node for access in fixtures
|
||||
setattr(item, f"rep_{rep.when}", rep)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def environment(mocker):
|
||||
"""Mock arrow.now() to return a fixed datetime for testing jinja2 time extension."""
|
||||
@@ -165,6 +257,57 @@ def prepare_test_function(live_server, datastore_path):
|
||||
except:
|
||||
break
|
||||
|
||||
# Add test helper methods to the app for worker management
|
||||
def set_workers(count):
|
||||
"""Set the number of workers for testing - brutal shutdown, no delays"""
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
|
||||
current_count = worker_pool.get_worker_count()
|
||||
|
||||
# Special case: Setting to 0 means shutdown all workers brutally
|
||||
if count == 0:
|
||||
logger.debug(f"Brutally shutting down all {current_count} workers")
|
||||
worker_pool.shutdown_workers()
|
||||
return {
|
||||
'status': 'success',
|
||||
'message': f'Shutdown all {current_count} workers',
|
||||
'previous_count': current_count,
|
||||
'current_count': 0
|
||||
}
|
||||
|
||||
# Adjust worker count (no delays, no verification)
|
||||
result = worker_pool.adjust_async_worker_count(
|
||||
count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=live_server.app,
|
||||
datastore=datastore
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def check_all_workers_alive(expected_count):
|
||||
"""Check that all expected workers are alive"""
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
result = worker_pool.check_worker_health(
|
||||
expected_count,
|
||||
update_q=update_q,
|
||||
notification_q=notification_q,
|
||||
app=live_server.app,
|
||||
datastore=datastore
|
||||
)
|
||||
assert result['status'] == 'healthy', f"Workers not healthy: {result['message']}"
|
||||
return result
|
||||
|
||||
# Attach helper methods to app for easy test access
|
||||
live_server.app.set_workers = set_workers
|
||||
live_server.app.check_all_workers_alive = check_all_workers_alive
|
||||
|
||||
|
||||
|
||||
|
||||
# Prevent background thread from writing during cleanup/reload
|
||||
datastore.needs_write = False
|
||||
datastore.needs_write_urgent = False
|
||||
@@ -262,8 +405,8 @@ def app(request, datastore_path):
|
||||
|
||||
# Shutdown workers gracefully before loguru cleanup
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
from changedetectionio import worker_pool
|
||||
worker_pool.shutdown_workers()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -311,4 +454,3 @@ def app(request, datastore_path):
|
||||
yield app
|
||||
|
||||
|
||||
|
||||
|
||||
41
changedetectionio/tests/plugins/test_processor.py
Normal file
41
changedetectionio/tests/plugins/test_processor.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
|
||||
from changedetectionio.tests.util import wait_for_all_checks
|
||||
|
||||
|
||||
def test_check_plugin_processor(client, live_server, measure_memory_usage, datastore_path):
|
||||
# requires os-int intelligence plugin installed (first basic one we test with)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'OSINT Reconnaissance' in res.data, "Must have the OSINT plugin installed at test time"
|
||||
assert b'<input checked id="processor-0" name="processor" type="radio" value="text_json_diff">' in res.data, "But the first text_json_diff processor should always be selected by default in quick watch form"
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": 'http://127.0.0.1', "tags": '', 'processor': 'osint_recon'},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'Target: http://127.0.0.1' in res.data
|
||||
assert b'DNSKEY Records' in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
# Now change it to something that doesnt exist
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
live_server.app.config['DATASTORE'].data['watching'][uuid]['processor'] = "now_missing"
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b"Exception: Processor module" in res.data and b'now_missing' in res.data, f'Should register that the plugin is missing for {uuid}'
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, extract_UUID_from_client, wait_for_all_checks
|
||||
from .util import live_server_setup, extract_UUID_from_client, wait_for_all_checks, delete_all_watches
|
||||
import os
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
||||
# And not this cause its not the ld-json
|
||||
assert b"So let's see what happens" not in res.data
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
##########################################################################################
|
||||
# And we shouldnt see the offer
|
||||
@@ -131,7 +131,7 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
||||
assert b'ldjson-price-track-offer' not in res.data
|
||||
|
||||
##########################################################################################
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data):
|
||||
@@ -147,7 +147,7 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_
|
||||
|
||||
|
||||
##########################################################################################
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -414,4 +414,4 @@ def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server
|
||||
assert b'Abonnementen bijwerken' in res.data
|
||||
assert b'<foobar' not in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
res = delete_all_watches(client)
|
||||
|
||||
@@ -6,7 +6,7 @@ from .util import (
|
||||
set_original_response,
|
||||
set_modified_response,
|
||||
live_server_setup,
|
||||
wait_for_all_checks
|
||||
wait_for_all_checks, delete_all_watches
|
||||
)
|
||||
from loguru import logger
|
||||
|
||||
@@ -104,7 +104,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode="", datast
|
||||
assert watch.has_unviewed, "The watch was not marked as unviewed after content change"
|
||||
|
||||
# Clean up
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_everything(live_server, client, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
||||
# 1. The page filtered text must contain "5" (first digit of value)
|
||||
# 2. The extracted number should be >= 20 and <= 100
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
"url": test_url,
|
||||
"fetch_backend": "html_requests",
|
||||
@@ -110,25 +110,20 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
||||
|
||||
wait_for_all_checks(client)
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(1)
|
||||
|
||||
# Case 1
|
||||
set_number_in_range_response(datastore_path=datastore_path, number="70.5")
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
time.sleep(2)
|
||||
# 75 is > 20 and < 100 and contains "5"
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
|
||||
# Case 2: Change with one condition violated
|
||||
# Number out of range (150) but contains '5'
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
|
||||
set_number_out_of_range_response(datastore_path=datastore_path, number="150.5")
|
||||
|
||||
@@ -154,7 +149,6 @@ def test_condition_validate_rule_row(client, live_server, measure_memory_usage,
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# the front end submits the current form state which should override the watch in a temporary copy
|
||||
res = client.post(
|
||||
@@ -195,12 +189,8 @@ def test_condition_validate_rule_row(client, live_server, measure_memory_usage,
|
||||
)
|
||||
assert res.status_code == 200
|
||||
assert b'false' in res.data
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("ui.form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||
@@ -230,17 +220,12 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage,
|
||||
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
)
|
||||
|
||||
# Assert the word count is counted correctly
|
||||
assert b'<td>13</td>' in res.data
|
||||
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("ui.form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
delete_all_watches(client)
|
||||
|
||||
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||
def test_lev_conditions_plugin(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -64,6 +64,7 @@ def test_DNS_errors(client, live_server, measure_memory_usage, datastore_path):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
@@ -79,7 +80,7 @@ def test_DNS_errors(client, live_server, measure_memory_usage, datastore_path):
|
||||
)
|
||||
assert found_name_resolution_error
|
||||
# Should always record that we tried
|
||||
assert bytes("just now".encode('utf-8')) in res.data
|
||||
assert "just now".encode('utf-8') in res.data or 'seconds ago'.encode('utf-8') in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
# Re 1513
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output, delete_all_watches
|
||||
from ..notification import valid_notification_formats
|
||||
|
||||
|
||||
@@ -118,8 +118,10 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'Warning, no filters were found' in res.data
|
||||
assert not os.path.isfile(notification_file)
|
||||
time.sleep(1)
|
||||
time.sleep(2)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5
|
||||
|
||||
time.sleep(2)
|
||||
@@ -178,6 +180,7 @@ def run_filter_test(client, live_server, content_filter, app_notification_format
|
||||
follow_redirects=True
|
||||
)
|
||||
os.unlink(notification_file)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -185,10 +188,12 @@ def test_check_include_filters_failure_notification(client, live_server, measure
|
||||
run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('htmlcolor'), datastore_path=datastore_path)
|
||||
# Check markup send conversion didnt affect plaintext preference
|
||||
run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('text'), datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
# # live_server_setup(live_server) # Setup on conftest per function
|
||||
run_filter_test(client=client, live_server=live_server, content_filter='//*[@id="nope-doesnt-exist"]', app_notification_format=valid_notification_formats.get('htmlcolor'), datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
# Test that notification is never sent
|
||||
|
||||
@@ -197,3 +202,4 @@ def test_basic_markup_from_text(client, live_server, measure_memory_usage, datas
|
||||
from ..notification.handler import markup_text_links_to_html
|
||||
x = markup_text_links_to_html("hello https://google.com")
|
||||
assert 'a href' in x
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -166,7 +166,8 @@ def test_tag_add_in_ui(client, live_server, measure_memory_usage, datastore_path
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_group_tag_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
@@ -142,6 +142,8 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
|
||||
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
|
||||
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_text_history_view(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
@@ -162,7 +164,7 @@ def test_check_text_history_view(client, live_server, measure_memory_usage, data
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("ui.ui_diff.diff_history_page", uuid="first"))
|
||||
res = client.get(url_for("ui.ui_diff.diff_history_page", uuid=uuid))
|
||||
assert b'test-one' in res.data
|
||||
assert b'test-two' in res.data
|
||||
|
||||
|
||||
@@ -40,10 +40,7 @@ def set_some_changed_response(datastore_path):
|
||||
|
||||
|
||||
def test_normal_page_check_works_with_ignore_status_code(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
from loguru import logger
|
||||
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
@@ -62,20 +59,41 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
|
||||
logger.info(f"TEST: First check - queuing UUID {uuid}")
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
logger.info(f"TEST: Waiting for first check to complete")
|
||||
wait_result = wait_for_all_checks(client)
|
||||
logger.info(f"TEST: First check wait completed: {wait_result}")
|
||||
|
||||
# Check history after first check
|
||||
watch = client.application.config.get('DATASTORE').data['watching'][uuid]
|
||||
logger.info(f"TEST: After first check - history count: {len(watch.history.keys())}")
|
||||
|
||||
set_some_changed_response(datastore_path=datastore_path)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Trigger a check
|
||||
logger.info(f"TEST: Second check - queuing UUID {uuid}")
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
logger.info(f"TEST: Waiting for second check to complete")
|
||||
wait_result = wait_for_all_checks(client)
|
||||
logger.info(f"TEST: Second check wait completed: {wait_result}")
|
||||
|
||||
# Check history after second check
|
||||
watch = client.application.config.get('DATASTORE').data['watching'][uuid]
|
||||
logger.info(f"TEST: After second check - history count: {len(watch.history.keys())}")
|
||||
logger.info(f"TEST: Watch history keys: {list(watch.history.keys())}")
|
||||
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
|
||||
if b'has-unread-changes' not in res.data:
|
||||
logger.error(f"TEST FAILED: has-unread-changes not found in response")
|
||||
logger.error(f"TEST: Watch last_error: {watch.get('last_error')}")
|
||||
logger.error(f"TEST: Watch last_checked: {watch.get('last_checked')}")
|
||||
|
||||
assert b'has-unread-changes' in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ def test_import_distillio(client, live_server, measure_memory_usage, datastore_p
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={
|
||||
|
||||
@@ -224,6 +224,7 @@ def check_json_filter(json_filter, client, live_server, datastore_path):
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
|
||||
delete_all_watches(client)
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', content_type="application/json", _external=True)
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"include_filters": json_filter.splitlines()})
|
||||
@@ -297,14 +298,17 @@ def check_json_filter_bool_val(json_filter, client, live_server, datastore_path)
|
||||
|
||||
def test_check_jsonpath_filter_bool_val(client, live_server, measure_memory_usage, datastore_path):
|
||||
check_json_filter_bool_val("json:$['available']", client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_jq_filter_bool_val(client, live_server, measure_memory_usage, datastore_path):
|
||||
if jq_support:
|
||||
check_json_filter_bool_val("jq:.available", client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_jqraw_filter_bool_val(client, live_server, measure_memory_usage, datastore_path):
|
||||
if jq_support:
|
||||
check_json_filter_bool_val("jq:.available", client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
# Re #265 - Extended JSON selector test
|
||||
# Stuff to consider here
|
||||
@@ -452,14 +456,17 @@ def test_correct_header_detect(client, live_server, measure_memory_usage, datast
|
||||
|
||||
def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage, datastore_path):
|
||||
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_jq_ext_filter(client, live_server, measure_memory_usage, datastore_path):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_check_jqraw_ext_filter(client, live_server, measure_memory_usage, datastore_path):
|
||||
if jq_support:
|
||||
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server, datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_jsonpath_BOM_utf8(client, live_server, measure_memory_usage, datastore_path):
|
||||
from .. import html_tools
|
||||
@@ -470,5 +477,6 @@ def test_jsonpath_BOM_utf8(client, live_server, measure_memory_usage, datastore_
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(json_str, "json:$.name")
|
||||
assert text == '"José"'
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
|
||||
@@ -313,14 +313,8 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
|
||||
# Add a watch and trigger a HTTP POST
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'nice one'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Watch added" in res.data
|
||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
watch_uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, tag="nice one")
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
set_modified_response(datastore_path=datastore_path)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, delete_all_watches
|
||||
import logging
|
||||
|
||||
def test_check_notification_error_handling(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -81,4 +81,4 @@ def test_check_notification_error_handling(client, live_server, measure_memory_u
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
assert 'xxxxx' in notification_submission
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
52
changedetectionio/tests/test_queue_handler.py
Normal file
52
changedetectionio/tests/test_queue_handler.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import os
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||
from ..notification import valid_notification_formats
|
||||
from loguru import logger
|
||||
|
||||
def test_queue_system(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Test that multiple workers can process queue concurrently without blocking each other"""
|
||||
# (pytest) Werkzeug's threaded server uses ThreadPoolExecutor with a default limit of around 40 threads (or min(32, os.cpu_count() + 4)).
|
||||
items = os.cpu_count() +3
|
||||
delay = 10
|
||||
# Auto-queue is off here.
|
||||
live_server.app.config['DATASTORE'].data['settings']['application']['all_paused'] = True
|
||||
|
||||
test_urls = [
|
||||
f"{url_for('test_endpoint', _external=True)}?delay={delay}&id={i}&content=hello+test+content+{i}"
|
||||
for i in range(0, items)
|
||||
]
|
||||
|
||||
# Import 30 URLs to queue
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": "\r\n".join(test_urls)},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert f"{items} Imported".encode('utf-8') in res.data
|
||||
|
||||
client.application.set_workers(items)
|
||||
|
||||
start = time.time()
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(delay/2)
|
||||
|
||||
# Verify all workers are idle (no UUIDs being processed)
|
||||
from changedetectionio import worker_pool
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
logger.debug( f"Should be atleast some workers running - {len(running_uuids)} UUIDs still being processed: {running_uuids}")
|
||||
assert len(running_uuids) != 0, f"Should be atleast some workers running - {len(running_uuids)} UUIDs still being processed: {running_uuids}"
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# all workers should be done in less than say 10 seconds (they take time to 'see' something is in the queue too)
|
||||
total_time = (time.time() - start)
|
||||
logger.debug(f"All workers finished {items} items in less than {delay} seconds per job. {total_time}s total")
|
||||
# if there was a bug in queue handler not running parallel, this would blow out to items*delay seconds
|
||||
assert total_time < delay + 10, f"All workers finished {items} items in less than {delay} seconds per job, total time {total_time}s"
|
||||
|
||||
# Verify all workers are idle (no UUIDs being processed)
|
||||
from changedetectionio import worker_pool
|
||||
running_uuids = worker_pool.get_running_uuids()
|
||||
assert len(running_uuids) == 0, f"Expected all workers to be idle, but {len(running_uuids)} UUIDs still being processed: {running_uuids}"
|
||||
@@ -17,12 +17,12 @@ def test_headers_in_request(client, live_server, measure_memory_usage, datastore
|
||||
test_url = test_url.replace('localhost', 'changedet')
|
||||
|
||||
# Add the test URL twice, we will check
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
uuidA = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
uuidB = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
@@ -31,7 +31,7 @@ def test_headers_in_request(client, live_server, measure_memory_usage, datastore
|
||||
|
||||
# Add some headers to a request
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuidA),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
@@ -42,13 +42,14 @@ def test_headers_in_request(client, live_server, measure_memory_usage, datastore
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick up the first version
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# The service should echo back the request headers
|
||||
res = client.get(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
url_for("ui.ui_preview.preview_page", uuid=uuidA),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -92,7 +93,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
|
||||
# add the first 'version'
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
@@ -110,7 +111,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
body_value = 'Test Body Value {{ 1+1 }}'
|
||||
body_value_formatted = 'Test Body Value 2'
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
@@ -126,7 +127,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
|
||||
# The service should echo back the body
|
||||
res = client.get(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
url_for("ui.ui_preview.preview_page", uuid=uuid),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -157,7 +158,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
|
||||
|
||||
# Attempt to add a body with a GET method
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
|
||||
@@ -236,6 +236,7 @@ def test_restock_itemprop_with_tag(client, live_server, measure_memory_usage, da
|
||||
}
|
||||
|
||||
_run_test_minmax_limit(client, extra_watch_edit_form=extras,datastore_path=datastore_path)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
|
||||
@@ -388,9 +389,10 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
res = client.post(url_for("ui.ui_notification.ajax_callback_send_notification_test", watch_uuid=uuid), data={}, follow_redirects=True)
|
||||
time.sleep(5)
|
||||
wait_for_notification_endpoint_output(datastore_path=datastore_path)
|
||||
assert os.path.isfile(os.path.join(datastore_path, "notification.txt")), "Notification received"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_data_sanity(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -406,6 +408,7 @@ def test_data_sanity(client, live_server, measure_memory_usage, datastore_path):
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
@@ -417,6 +420,7 @@ def test_data_sanity(client, live_server, measure_memory_usage, datastore_path):
|
||||
data={"url": test_url2, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert str(res.data.decode()).count("950.95") == 1, "Price should only show once (for the watch added, no other watches yet)"
|
||||
@@ -462,3 +466,4 @@ def test_special_prop_examples(client, live_server, measure_memory_usage, datast
|
||||
assert b'ception' not in res.data
|
||||
assert b'155.55' in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -107,7 +107,7 @@ def test_rss_and_token(client, live_server, measure_memory_usage, datastore_path
|
||||
assert b"Access denied, bad token" not in res.data
|
||||
assert b"Random content" in res.data
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_basic_cdata_rss_markup(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ def test_rss_feed_empty(client, live_server, measure_memory_usage, datastore_pat
|
||||
)
|
||||
assert res.status_code == 400
|
||||
assert b'does not have enough history snapshots to show' in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_rss_single_watch_order(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
|
||||
@@ -24,20 +24,20 @@ def test_share_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
# Goto the edit page, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={"include_filters": include_filters, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
)
|
||||
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||
|
||||
# click share the link
|
||||
res = client.get(
|
||||
url_for("ui.form_share_put_watch", uuid="first"),
|
||||
url_for("ui.form_share_put_watch", uuid=uuid),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -63,13 +63,16 @@ def test_share_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
# Now hit edit, we should see what we expect
|
||||
# that the import fetched the meta-data
|
||||
|
||||
uuids = list(client.application.config.get('DATASTORE').data['watching'])
|
||||
assert uuids, "It saved/imported and created a new URL from the share"
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuids[0]),
|
||||
)
|
||||
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||
|
||||
# Check it saved the URL
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert bytes(test_url.encode('utf-8')) in res.data
|
||||
|
||||
delete_all_watches(client)
|
||||
@@ -25,6 +25,7 @@ def test_recheck_time_field_validation_global_settings(client, live_server, meas
|
||||
|
||||
|
||||
assert REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT.encode('utf-8') in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_recheck_time_field_validation_single_watch(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -94,6 +95,7 @@ def test_recheck_time_field_validation_single_watch(client, live_server, measure
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
assert REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT.encode('utf-8') not in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
def test_checkbox_open_diff_in_new_tab(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -242,6 +244,7 @@ def test_page_title_listing_behaviour(client, live_server, measure_memory_usage,
|
||||
# No page title description, and 'use_page_title_in_list' is on, it should show the <title>
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b"head titlecustom html" in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_ui_viewed_unread_flag(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -283,4 +286,5 @@ def test_ui_viewed_unread_flag(client, live_server, measure_memory_usage, datast
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'<span id="unread-tab-counter">0</span>' in res.data
|
||||
assert b'<span id="unread-tab-counter">0</span>' in res.data
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -366,7 +366,7 @@ def test_check_with_prefix_include_filters(client, live_server, measure_memory_u
|
||||
assert b"Some text thats the same" in res.data # in selector
|
||||
assert b"Some text that will change" not in res.data # not in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_various_rules(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -423,7 +423,7 @@ def test_xpath_20(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
url_for("ui.ui_edit.edit_page", uuid=uuid),
|
||||
data={"include_filters": "//*[contains(@class, 'sametext')]|//*[contains(@class, 'changetext')]",
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
@@ -437,14 +437,14 @@ def test_xpath_20(client, live_server, measure_memory_usage, datastore_path):
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
||||
url_for("ui.ui_preview.preview_page", uuid=uuid),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Some text thats the same" in res.data # in selector
|
||||
assert b"Some text that will change" in res.data # in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_xpath_20_function_count(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -477,7 +477,7 @@ def test_xpath_20_function_count(client, live_server, measure_memory_usage, data
|
||||
|
||||
assert b"246913579975308642" in res.data # in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_xpath_20_function_count2(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -501,6 +501,8 @@ def test_xpath_20_function_count2(client, live_server, measure_memory_usage, dat
|
||||
)
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
@@ -510,7 +512,7 @@ def test_xpath_20_function_count2(client, live_server, measure_memory_usage, dat
|
||||
|
||||
assert b"246913579975308642" in res.data # in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_xpath_20_function_string_join_matches(client, live_server, measure_memory_usage, datastore_path):
|
||||
@@ -544,7 +546,7 @@ def test_xpath_20_function_string_join_matches(client, live_server, measure_memo
|
||||
|
||||
assert b"Some text thats the samespecialconjunctionSome text that will change" in res.data # in selector
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def _subtest_xpath_rss(client, datastore_path, content_type='text/html'):
|
||||
@@ -582,7 +584,7 @@ def _subtest_xpath_rss(client, datastore_path, content_type='text/html'):
|
||||
assert b"Lets go discount" in res.data, f"When testing for Lets go discount called with content type '{content_type}'"
|
||||
assert b"Events and Announcements" not in res.data, f"When testing for Lets go discount called with content type '{content_type}'" # It should not be here because thats not our selector target
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
delete_all_watches(client)
|
||||
|
||||
# Be sure all-in-the-wild types of RSS feeds work with xpath
|
||||
def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path):
|
||||
|
||||
@@ -6,6 +6,42 @@ from flask import url_for
|
||||
import logging
|
||||
import time
|
||||
import os
|
||||
import threading
|
||||
|
||||
# Thread-safe global storage for test endpoint content
|
||||
# Avoids filesystem cache issues in parallel tests
|
||||
_test_endpoint_content_lock = threading.Lock()
|
||||
_test_endpoint_content = {}
|
||||
|
||||
def write_test_file_and_sync(filepath, content, mode='w'):
|
||||
"""
|
||||
Write test data to file and ensure it's synced to disk.
|
||||
Also stores in thread-safe global dict to bypass filesystem cache.
|
||||
|
||||
Critical for parallel tests where workers may read files immediately after write.
|
||||
Without fsync(), data may still be in OS buffers when workers try to read,
|
||||
causing race conditions where old data is seen.
|
||||
|
||||
Args:
|
||||
filepath: Full path to file
|
||||
content: Content to write (str or bytes)
|
||||
mode: File mode ('w' for text, 'wb' for binary)
|
||||
"""
|
||||
# Convert content to bytes if needed
|
||||
if isinstance(content, str):
|
||||
content_bytes = content.encode('utf-8')
|
||||
else:
|
||||
content_bytes = content
|
||||
|
||||
# Store in thread-safe global dict for instant access
|
||||
with _test_endpoint_content_lock:
|
||||
_test_endpoint_content[os.path.basename(filepath)] = content_bytes
|
||||
|
||||
# Also write to file for compatibility
|
||||
with open(filepath, mode) as f:
|
||||
f.write(content)
|
||||
f.flush() # Flush Python buffer to OS
|
||||
os.fsync(f.fileno()) # Force OS to write to disk
|
||||
|
||||
def set_original_response(datastore_path, extra_title=''):
|
||||
test_return_data = f"""<html>
|
||||
@@ -20,8 +56,7 @@ def set_original_response(datastore_path, extra_title=''):
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
return None
|
||||
|
||||
def set_modified_response(datastore_path):
|
||||
@@ -36,9 +71,7 @@ def set_modified_response(datastore_path):
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
return None
|
||||
def set_longer_modified_response(datastore_path):
|
||||
test_return_data = """<html>
|
||||
@@ -55,9 +88,7 @@ def set_longer_modified_response(datastore_path):
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
return None
|
||||
|
||||
def set_more_modified_response(datastore_path):
|
||||
@@ -73,17 +104,14 @@ def set_more_modified_response(datastore_path):
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
return None
|
||||
|
||||
|
||||
def set_empty_text_response(datastore_path):
|
||||
test_return_data = """<html><body></body></html>"""
|
||||
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(test_return_data)
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), test_return_data)
|
||||
|
||||
return None
|
||||
|
||||
@@ -132,21 +160,40 @@ def extract_UUID_from_client(client):
|
||||
return uuid.strip()
|
||||
|
||||
def delete_all_watches(client=None):
|
||||
|
||||
# Change tracking
|
||||
client.application.config.get('DATASTORE')._dirty_watches = set() # Watch UUIDs that need saving
|
||||
client.application.config.get('DATASTORE')._dirty_settings = False # Settings changed
|
||||
client.application.config.get('DATASTORE')._watch_hashes = {} # UUID -> SHA256 hash for change detection
|
||||
|
||||
uuids = list(client.application.config.get('DATASTORE').data['watching'])
|
||||
for uuid in uuids:
|
||||
client.application.config.get('DATASTORE').delete(uuid)
|
||||
from changedetectionio.flask_app import update_q
|
||||
|
||||
# Clear the queue to prevent leakage to next test
|
||||
# Use clear() method to ensure both priority_items and notification_queue are drained
|
||||
if hasattr(update_q, 'clear'):
|
||||
update_q.clear()
|
||||
else:
|
||||
# Fallback for old implementation
|
||||
while not update_q.empty():
|
||||
try:
|
||||
update_q.get_nowait()
|
||||
except:
|
||||
break
|
||||
|
||||
time.sleep(0.2)
|
||||
|
||||
def wait_for_all_checks(client=None):
|
||||
"""
|
||||
Waits until the queue is empty and workers are idle.
|
||||
Delegates to worker_handler.wait_for_all_checks for shared logic.
|
||||
Delegates to worker_pool.wait_for_all_checks for shared logic.
|
||||
"""
|
||||
from changedetectionio.flask_app import update_q as global_update_q
|
||||
from changedetectionio import worker_handler
|
||||
time.sleep(0.05)
|
||||
# Use the shared wait logic from worker_handler
|
||||
return worker_handler.wait_for_all_checks(global_update_q, timeout=150)
|
||||
from changedetectionio import worker_pool
|
||||
return worker_pool.wait_for_all_checks(global_update_q, timeout=150)
|
||||
|
||||
|
||||
def wait_for_watch_history(client, min_history_count=2, timeout=10):
|
||||
"""
|
||||
@@ -195,8 +242,11 @@ def new_live_server_setup(live_server):
|
||||
|
||||
@live_server.app.route('/test-endpoint')
|
||||
def test_endpoint():
|
||||
from loguru import logger
|
||||
logger.debug(f"/test-endpoint hit {request}")
|
||||
# REMOVED: logger.debug() causes file locking between test process and Flask server process
|
||||
# Flask server runs in separate multiprocessing.Process and inherited loguru tries to
|
||||
# write to same log files, causing request handlers to block on file locks
|
||||
# from loguru import logger
|
||||
# logger.debug(f"/test-endpoint hit {request}")
|
||||
ctype = request.args.get('content_type')
|
||||
status_code = request.args.get('status_code')
|
||||
content = request.args.get('content') or None
|
||||
@@ -218,15 +268,35 @@ def new_live_server_setup(live_server):
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
|
||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||
datastore_path = current_app.config.get('TEST_DATASTORE_PATH', 'test-datastore')
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "rb") as f:
|
||||
resp = make_response(f.read(), status_code)
|
||||
if uppercase_headers:
|
||||
resp.headers['CONTENT-TYPE'] = ctype if ctype else 'text/html'
|
||||
else:
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
# Check thread-safe global dict first (instant, no cache issues)
|
||||
# Fall back to file if not in dict (for tests that write directly)
|
||||
with _test_endpoint_content_lock:
|
||||
content_data = _test_endpoint_content.get("endpoint-content.txt")
|
||||
|
||||
if content_data is None:
|
||||
# Not in global dict, read from file
|
||||
datastore_path = current_app.config.get('TEST_DATASTORE_PATH', 'test-datastore')
|
||||
filepath = os.path.join(datastore_path, "endpoint-content.txt")
|
||||
|
||||
# REMOVED: os.sync() was blocking for many seconds during parallel tests
|
||||
# With -n 6+ parallel tests, heavy I/O causes os.sync() to wait for ALL
|
||||
# system writes to complete, causing "Read timed out" errors
|
||||
# File writes from test code are already flushed by the time workers fetch
|
||||
|
||||
try:
|
||||
with open(filepath, "rb") as f:
|
||||
content_data = f.read()
|
||||
except Exception as e:
|
||||
# REMOVED: logger.error() causes file locking in multiprocess context
|
||||
# Just raise the exception directly for debugging
|
||||
raise
|
||||
|
||||
resp = make_response(content_data, status_code)
|
||||
if uppercase_headers:
|
||||
resp.headers['CONTENT-TYPE'] = ctype if ctype else 'text/html'
|
||||
else:
|
||||
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
|
||||
return resp
|
||||
except FileNotFoundError:
|
||||
return make_response('', status_code)
|
||||
|
||||
@@ -301,6 +371,12 @@ def new_live_server_setup(live_server):
|
||||
def test_pdf_endpoint():
|
||||
datastore_path = current_app.config.get('TEST_DATASTORE_PATH', 'test-datastore')
|
||||
|
||||
# Force filesystem sync before reading to ensure fresh data
|
||||
try:
|
||||
os.sync()
|
||||
except (AttributeError, PermissionError):
|
||||
pass
|
||||
|
||||
# Tried using a global var here but didn't seem to work, so reading from a file instead.
|
||||
with open(os.path.join(datastore_path, "endpoint-test.pdf"), "rb") as f:
|
||||
resp = make_response(f.read(), 200)
|
||||
|
||||
@@ -3,7 +3,9 @@ from .processors.exceptions import ProcessorException
|
||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio import worker_pool
|
||||
from changedetectionio.flask_app import watch_check_update
|
||||
from changedetectionio.queuedWatchMetaData import PrioritizedItem
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
@@ -46,19 +48,33 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
jobs_processed = 0
|
||||
start_time = time.time()
|
||||
|
||||
logger.info(f"Starting async worker {worker_id} (max_jobs={max_jobs}, max_runtime={max_runtime_seconds}s)")
|
||||
# Log thread name for debugging
|
||||
import threading
|
||||
thread_name = threading.current_thread().name
|
||||
logger.info(f"Starting async worker {worker_id} on thread '{thread_name}' (max_jobs={max_jobs}, max_runtime={max_runtime_seconds}s)")
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
update_handler = None
|
||||
watch = None
|
||||
|
||||
try:
|
||||
# Use sync interface via run_in_executor since each worker has its own event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
queued_item_data = await asyncio.wait_for(
|
||||
loop.run_in_executor(executor, q.get, True, 1.0), # block=True, timeout=1.0
|
||||
timeout=1.5
|
||||
)
|
||||
# Efficient blocking via run_in_executor (no polling overhead!)
|
||||
# Worker blocks in threading.Queue.get() which uses Condition.wait()
|
||||
# Executor must be sized to match worker count (see worker_pool.py: 50 threads default)
|
||||
# Single timeout (no double-timeout wrapper) = no race condition
|
||||
queued_item_data = await q.async_get(executor=executor, timeout=1.0)
|
||||
|
||||
# CRITICAL: Claim UUID immediately after getting from queue to prevent race condition
|
||||
# in wait_for_all_checks() which checks qsize() and running_uuids separately
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
if not worker_pool.claim_uuid_for_processing(uuid, worker_id):
|
||||
# Already being processed - re-queue and continue
|
||||
logger.trace(f"Worker {worker_id} detected UUID {uuid} already processing during claim - deferring")
|
||||
await asyncio.sleep(DEFER_SLEEP_TIME_ALREADY_QUEUED)
|
||||
deferred_priority = max(1000, queued_item_data.priority * 10)
|
||||
deferred_item = PrioritizedItem(priority=deferred_priority, item=queued_item_data.item)
|
||||
worker_pool.queue_item_async_safe(q, deferred_item, silent=True)
|
||||
continue
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
# No jobs available - check if we should restart based on time while idle
|
||||
@@ -67,6 +83,17 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
logger.info(f"Worker {worker_id} idle and reached max runtime ({runtime:.0f}s), restarting")
|
||||
return "restart"
|
||||
continue
|
||||
except RuntimeError as e:
|
||||
# Handle executor shutdown gracefully - this is expected during shutdown
|
||||
if "cannot schedule new futures after shutdown" in str(e):
|
||||
# Executor shut down - exit gracefully without logging in pytest
|
||||
if not IN_PYTEST:
|
||||
logger.debug(f"Worker {worker_id} detected executor shutdown, exiting")
|
||||
break
|
||||
# Other RuntimeError - log and continue
|
||||
logger.error(f"Worker {worker_id} runtime error: {e}")
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
except Exception as e:
|
||||
# Handle expected Empty exception from queue timeout
|
||||
import queue
|
||||
@@ -88,26 +115,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
|
||||
uuid = queued_item_data.item.get('uuid')
|
||||
|
||||
# RACE CONDITION FIX: Atomically claim this UUID for processing
|
||||
from changedetectionio import worker_handler
|
||||
from changedetectionio.queuedWatchMetaData import PrioritizedItem
|
||||
|
||||
# Try to claim the UUID atomically - prevents duplicate processing
|
||||
if not worker_handler.claim_uuid_for_processing(uuid, worker_id):
|
||||
# Already being processed by another worker
|
||||
logger.trace(f"Worker {worker_id} detected UUID {uuid} already being processed - deferring")
|
||||
|
||||
# Sleep to avoid tight loop and give the other worker time to finish
|
||||
await asyncio.sleep(DEFER_SLEEP_TIME_ALREADY_QUEUED)
|
||||
|
||||
# Re-queue with lower priority so it gets checked again after current processing finishes
|
||||
deferred_priority = max(1000, queued_item_data.priority * 10)
|
||||
deferred_item = PrioritizedItem(priority=deferred_priority, item=queued_item_data.item)
|
||||
worker_handler.queue_item_async_safe(q, deferred_item, silent=True)
|
||||
logger.debug(f"Worker {worker_id} re-queued UUID {uuid} for subsequent check")
|
||||
continue
|
||||
# UUID already claimed above immediately after getting from queue
|
||||
# to prevent race condition with wait_for_all_checks()
|
||||
|
||||
fetch_start_time = round(time.time())
|
||||
|
||||
@@ -133,11 +142,14 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Init a new 'difference_detection_processor'
|
||||
try:
|
||||
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
|
||||
except ModuleNotFoundError as e:
|
||||
print(f"Processor module '{processor}' not found.")
|
||||
raise e
|
||||
# Use get_processor_module() to support both built-in and plugin processors
|
||||
from changedetectionio.processors import get_processor_module
|
||||
processor_module = get_processor_module(processor)
|
||||
|
||||
if not processor_module:
|
||||
error_msg = f"Processor module '{processor}' not found."
|
||||
logger.error(error_msg)
|
||||
raise ModuleNotFoundError(error_msg)
|
||||
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid)
|
||||
@@ -224,6 +236,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
except FilterNotFoundInResponse as e:
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
logger.debug(f"Received FilterNotFoundInResponse exception for {uuid}")
|
||||
|
||||
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
||||
@@ -243,17 +256,19 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
||||
logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
|
||||
logger.debug(f"FilterNotFoundInResponse - Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
|
||||
if c >= threshold:
|
||||
if not watch.get('notification_muted'):
|
||||
logger.debug(f"Sending filter failed notification for {uuid}")
|
||||
logger.debug(f"FilterNotFoundInResponse - Sending filter failed notification for {uuid}")
|
||||
await send_filter_failure_notification(uuid, notification_q, datastore)
|
||||
c = 0
|
||||
logger.debug(f"Reset filter failure count back to zero")
|
||||
logger.debug(f"FilterNotFoundInResponse - Reset filter failure count back to zero")
|
||||
else:
|
||||
logger.debug(f"FilterNotFoundInResponse - {c} of threshold {threshold}..")
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
||||
else:
|
||||
logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
|
||||
logger.trace(f"FilterNotFoundInResponse - {uuid} - filter_failure_notification_send not enabled, skipping")
|
||||
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -353,8 +368,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"Worker {worker_id} exception processing watch UUID: {uuid}")
|
||||
logger.error(str(e))
|
||||
logger.error(traceback.format_exc())
|
||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
|
||||
process_changedetection_results = False
|
||||
|
||||
@@ -373,8 +390,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
if not datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
logger.debug(f"Processing watch UUID: {uuid} - xpath_data length returned {len(update_handler.xpath_data) if update_handler.xpath_data else 'empty.'}")
|
||||
if process_changedetection_results:
|
||||
logger.debug(f"Processing watch UUID: {uuid} - xpath_data length returned {len(update_handler.xpath_data) if update_handler and update_handler.xpath_data else 'empty.'}")
|
||||
if update_handler and process_changedetection_results:
|
||||
try:
|
||||
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
@@ -424,55 +441,53 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
|
||||
# Always record attempt count
|
||||
count = watch.get('check_count', 0) + 1
|
||||
if update_handler: # Could be none or empty if the processor was not found
|
||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
||||
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
if page_title:
|
||||
page_title = page_title.strip()[:2000]
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
|
||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
||||
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
|
||||
# Record server header
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
if page_title:
|
||||
page_title = page_title.strip()[:2000]
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
||||
datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header})
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
pass
|
||||
|
||||
# Record server header
|
||||
try:
|
||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
||||
datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header})
|
||||
except Exception as e:
|
||||
pass
|
||||
# Store favicon if necessary
|
||||
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
|
||||
watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
|
||||
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
|
||||
)
|
||||
|
||||
# Store favicon if necessary
|
||||
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
|
||||
watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
|
||||
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
|
||||
)
|
||||
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||
'check_count': count})
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||
'check_count': count})
|
||||
# NOW clear fetcher content - after all processing is complete
|
||||
# This is the last point where we need the fetcher data
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
||||
|
||||
# NOW clear fetcher content - after all processing is complete
|
||||
# This is the last point where we need the fetcher data
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
||||
# Explicitly delete update_handler to free all references
|
||||
if update_handler:
|
||||
del update_handler
|
||||
update_handler = None
|
||||
|
||||
# Explicitly delete update_handler to free all references
|
||||
if update_handler:
|
||||
del update_handler
|
||||
update_handler = None
|
||||
|
||||
# Force aggressive memory cleanup after clearing
|
||||
# Force garbage collection
|
||||
import gc
|
||||
gc.collect()
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
||||
|
||||
@@ -483,6 +498,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
finally:
|
||||
# Always cleanup - this runs whether there was an exception or not
|
||||
if uuid:
|
||||
# Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
|
||||
try:
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
await update_handler.fetcher.quit(watch=watch)
|
||||
@@ -490,36 +506,26 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
logger.error(f"Exception while cleaning/quit after calling browser: {e}")
|
||||
try:
|
||||
# Release UUID from processing (thread-safe)
|
||||
worker_handler.release_uuid_from_processing(uuid, worker_id=worker_id)
|
||||
|
||||
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
|
||||
|
||||
# Send completion signal
|
||||
if watch:
|
||||
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
|
||||
watch_check_update.send(watch_uuid=watch['uuid'])
|
||||
|
||||
# Explicitly clean up update_handler and all its references
|
||||
# Clean up all memory references BEFORE garbage collection
|
||||
if update_handler:
|
||||
# Clear fetcher content using the proper method
|
||||
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
|
||||
# Clear processor references
|
||||
if hasattr(update_handler, 'content_processor'):
|
||||
update_handler.content_processor = None
|
||||
|
||||
del update_handler
|
||||
update_handler = None
|
||||
|
||||
# Clear local contents variable if it still exists
|
||||
# Clear large content variables
|
||||
if 'contents' in locals():
|
||||
del contents
|
||||
|
||||
# Note: We don't set watch = None here because:
|
||||
# 1. watch is just a local reference to datastore.data['watching'][uuid]
|
||||
# 2. Setting it to None doesn't affect the datastore
|
||||
# 3. GC can't collect the object anyway (still referenced by datastore)
|
||||
# 4. It would just cause confusion
|
||||
|
||||
# Force garbage collection after cleanup
|
||||
# Force garbage collection after all references are cleared
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
@@ -23,11 +23,14 @@ _uuid_processing_lock = threading.Lock() # Protects currently_processing_uuids
|
||||
USE_ASYNC_WORKERS = True
|
||||
|
||||
# Custom ThreadPoolExecutor for queue operations with named threads
|
||||
# Scale executor threads with FETCH_WORKERS to avoid bottleneck at high concurrency
|
||||
_max_executor_workers = max(50, int(os.getenv("FETCH_WORKERS", "10")))
|
||||
# Scale executor threads to match FETCH_WORKERS (no minimum, no maximum)
|
||||
# Thread naming: "QueueGetter-N" for easy debugging in thread dumps/traces
|
||||
# With FETCH_WORKERS=10: 10 workers + 10 executor threads = 20 threads total
|
||||
# With FETCH_WORKERS=500: 500 workers + 500 executor threads = 1000 threads total (acceptable on modern systems)
|
||||
_max_executor_workers = int(os.getenv("FETCH_WORKERS", "10"))
|
||||
queue_executor = ThreadPoolExecutor(
|
||||
max_workers=_max_executor_workers,
|
||||
thread_name_prefix="QueueGetter-"
|
||||
thread_name_prefix="QueueGetter-" # Shows in thread dumps as "QueueGetter-0", "QueueGetter-1", etc.
|
||||
)
|
||||
|
||||
|
||||
@@ -82,16 +85,17 @@ class WorkerThread:
|
||||
self.loop = None
|
||||
|
||||
def start(self):
|
||||
"""Start the worker thread"""
|
||||
"""Start the worker thread with descriptive name for debugging"""
|
||||
self.thread = threading.Thread(
|
||||
target=self.run,
|
||||
daemon=True,
|
||||
name=f"PageFetchAsyncUpdateWorker-{self.worker_id}"
|
||||
name=f"PageFetchAsyncUpdateWorker-{self.worker_id}" # Shows in thread dumps with worker ID
|
||||
)
|
||||
self.thread.start()
|
||||
|
||||
def stop(self):
|
||||
"""Stop the worker thread"""
|
||||
"""Stop the worker thread brutally - no waiting"""
|
||||
# Try to stop the event loop if it exists
|
||||
if self.loop and self.running:
|
||||
try:
|
||||
# Signal the loop to stop
|
||||
@@ -99,8 +103,7 @@ class WorkerThread:
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
if self.thread and self.thread.is_alive():
|
||||
self.thread.join(timeout=2.0)
|
||||
# Don't wait - thread is daemon and will die when needed
|
||||
|
||||
|
||||
def start_async_workers(n_workers, update_q, notification_q, app, datastore):
|
||||
@@ -125,7 +128,7 @@ def start_async_workers(n_workers, update_q, notification_q, app, datastore):
|
||||
|
||||
async def start_single_async_worker(worker_id, update_q, notification_q, app, datastore, executor=None):
|
||||
"""Start a single async worker with auto-restart capability"""
|
||||
from changedetectionio.async_update_worker import async_update_worker
|
||||
from changedetectionio.worker import async_update_worker
|
||||
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
import os
|
||||
@@ -337,24 +340,36 @@ def queue_item_async_safe(update_q, item, silent=False):
|
||||
|
||||
|
||||
def shutdown_workers():
|
||||
"""Shutdown all async workers fast and aggressively"""
|
||||
global worker_threads
|
||||
"""Shutdown all async workers brutally - no delays, no waiting"""
|
||||
global worker_threads, queue_executor
|
||||
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
import os
|
||||
in_pytest = "pytest" in os.sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
if not in_pytest:
|
||||
logger.info("Fast shutdown of async workers initiated...")
|
||||
logger.info("Brutal shutdown of async workers initiated...")
|
||||
|
||||
# Stop all worker threads
|
||||
# Stop all worker event loops
|
||||
for worker in worker_threads:
|
||||
worker.stop()
|
||||
|
||||
# Clear immediately - threads are daemon and will die
|
||||
worker_threads.clear()
|
||||
|
||||
# Shutdown the queue executor to prevent "cannot schedule new futures after shutdown" errors
|
||||
# This must happen AFTER workers are stopped to avoid race conditions
|
||||
if queue_executor:
|
||||
try:
|
||||
queue_executor.shutdown(wait=False)
|
||||
if not in_pytest:
|
||||
logger.debug("Queue executor shut down")
|
||||
except Exception as e:
|
||||
if not in_pytest:
|
||||
logger.warning(f"Error shutting down queue executor: {e}")
|
||||
|
||||
if not in_pytest:
|
||||
logger.info("Async workers fast shutdown complete")
|
||||
logger.info("Async workers brutal shutdown complete")
|
||||
|
||||
|
||||
|
||||
@@ -469,12 +484,14 @@ def wait_for_all_checks(update_q, timeout=150):
|
||||
elif time.time() - empty_since >= 0.3:
|
||||
# Add small buffer for filesystem operations to complete
|
||||
time.sleep(0.2)
|
||||
logger.trace("wait_for_all_checks: All checks complete (queue empty, workers idle)")
|
||||
return True
|
||||
else:
|
||||
empty_since = None
|
||||
|
||||
attempt += 1
|
||||
|
||||
logger.warning(f"wait_for_all_checks: Timeout after {timeout} attempts")
|
||||
return False # Timeout
|
||||
|
||||
|
||||
@@ -16,6 +16,13 @@ services:
|
||||
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
|
||||
# - LOGGER_LEVEL=TRACE
|
||||
#
|
||||
# Plugins! See https://changedetection.io/plugins for more plugins.
|
||||
# Install additional Python packages (processor plugins, etc.)
|
||||
# Example: Install the OSINT reconnaissance processor plugin
|
||||
# - EXTRA_PACKAGES=changedetection.io-osint-processor
|
||||
# Multiple packages can be installed by separating with spaces:
|
||||
# - EXTRA_PACKAGES=changedetection.io-osint-processor another-plugin
|
||||
#
|
||||
#
|
||||
# Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol)
|
||||
# - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000
|
||||
|
||||
28
docker-entrypoint.sh
Executable file
28
docker-entrypoint.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Install additional packages from EXTRA_PACKAGES env var
|
||||
# Uses a marker file to avoid reinstalling on every container restart
|
||||
INSTALLED_MARKER="/datastore/.extra_packages_installed"
|
||||
CURRENT_PACKAGES="$EXTRA_PACKAGES"
|
||||
|
||||
if [ -n "$EXTRA_PACKAGES" ]; then
|
||||
# Check if we need to install/update packages
|
||||
if [ ! -f "$INSTALLED_MARKER" ] || [ "$(cat $INSTALLED_MARKER 2>/dev/null)" != "$CURRENT_PACKAGES" ]; then
|
||||
echo "Installing extra packages: $EXTRA_PACKAGES"
|
||||
pip3 install --no-cache-dir $EXTRA_PACKAGES
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "$CURRENT_PACKAGES" > "$INSTALLED_MARKER"
|
||||
echo "Extra packages installed successfully"
|
||||
else
|
||||
echo "ERROR: Failed to install extra packages"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Extra packages already installed: $EXTRA_PACKAGES"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Execute the main command
|
||||
exec "$@"
|
||||
@@ -8,7 +8,7 @@ flask-paginate
|
||||
flask_expects_json~=1.7
|
||||
flask_restful
|
||||
flask_cors # For the Chrome extension to operate
|
||||
janus # Thread-safe async/sync queue bridge
|
||||
# janus # No longer needed - using pure threading.Queue for multi-loop support
|
||||
flask_wtf~=1.2
|
||||
flask~=3.1
|
||||
flask-socketio~=5.6.0
|
||||
@@ -51,9 +51,9 @@ linkify-it-py
|
||||
|
||||
# - Needed for apprise/spush, and maybe others? hopefully doesnt trigger a rust compile.
|
||||
# - Requires extra wheel for rPi, adds build time for arm/v8 which is not in piwheels
|
||||
# Pinned to 43.0.1 for ARM compatibility (45.x may not have pre-built ARM wheels)
|
||||
# Pinned to 44.x for ARM compatibility and sslyze compatibility (sslyze requires <45) and (45.x may not have pre-built ARM wheels)
|
||||
# Also pinned because dependabot wants specific versions
|
||||
cryptography==46.0.3
|
||||
cryptography==44.0.0
|
||||
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
# use any version other than 2.0.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
|
||||
@@ -70,7 +70,7 @@ lxml >=4.8.0,!=5.2.0,!=5.2.1,<7
|
||||
|
||||
# XPath 2.0-3.1 support - 4.2.0 had issues, 4.1.5 stable
|
||||
# Consider updating to latest stable version periodically
|
||||
elementpath==5.1.0
|
||||
elementpath==5.1.1
|
||||
|
||||
# For fast image comparison in screenshot change detection
|
||||
# opencv-python-headless is OPTIONAL (excluded from requirements.txt)
|
||||
|
||||
Reference in New Issue
Block a user