mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-02-03 21:06:00 +00:00
Compare commits
2 Commits
processor-
...
puppeteer-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c4a5e4a361 | ||
|
|
ee2a6fb77f |
@@ -138,15 +138,6 @@ ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy and set up entrypoint script for installing extra packages
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh
|
||||
|
||||
# Set entrypoint to handle EXTRA_PACKAGES env var
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
|
||||
# Default command (can be overridden in docker-compose.yml)
|
||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||
|
||||
|
||||
|
||||
@@ -169,18 +169,58 @@ class Watch(Resource):
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
from changedetectionio import processors
|
||||
processor_config_data = {}
|
||||
regular_data = {}
|
||||
|
||||
# Make a mutable copy of request.json for modification
|
||||
json_data = dict(request.json)
|
||||
|
||||
# Extract and remove processor config fields from json_data
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(json_data)
|
||||
for key, value in request.json.items():
|
||||
if key.startswith('processor_config_'):
|
||||
config_key = key.replace('processor_config_', '')
|
||||
if value: # Only save non-empty values
|
||||
processor_config_data[config_key] = value
|
||||
else:
|
||||
regular_data[key] = value
|
||||
|
||||
# Update watch with regular (non-processor-config) fields
|
||||
watch.update(json_data)
|
||||
watch.update(regular_data)
|
||||
|
||||
# Save processor config to JSON file
|
||||
processors.save_processor_config(self.datastore, uuid, processor_config_data)
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = request.json.get('processor', watch.get('processor'))
|
||||
if processor_name:
|
||||
# Create a processor instance to access config methods
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
processor_instance = difference_detection_processor(self.datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"API: Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"API: Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"API: Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"API: Failed to save processor config: {e}")
|
||||
|
||||
return "OK", 200
|
||||
|
||||
|
||||
@@ -26,9 +26,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# URL List import
|
||||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||
# Import and push into the queue for immediate update check
|
||||
from changedetectionio import processors
|
||||
importer_handler = import_url_list()
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', processors.get_default_processor()))
|
||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||
logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs")
|
||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||
# for uuid in importer_handler.new_uuids:
|
||||
|
||||
@@ -100,21 +100,23 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have difference module, use text_json_diff as default
|
||||
from changedetectionio.processors.text_json_diff.difference import render as default_render
|
||||
@@ -154,21 +156,23 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
|
||||
# Call the processor's render_form() function
|
||||
if processor_module and hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
# Call the processor's render_form() function
|
||||
if hasattr(processor_module, 'render_form'):
|
||||
return processor_module.render_form(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import render_form as default_render_form
|
||||
@@ -208,22 +212,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||
try:
|
||||
# Try to import the processor's extract module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
||||
|
||||
# Call the processor's process_extraction() function
|
||||
if processor_module and hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
# Call the processor's process_extraction() function
|
||||
if hasattr(processor_module, 'process_extraction'):
|
||||
return processor_module.process_extraction(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
make_response=make_response,
|
||||
send_from_directory=send_from_directory,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||
from changedetectionio.processors.extract import process_extraction as default_process_extraction
|
||||
@@ -273,33 +279,38 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||
try:
|
||||
# Try to import the processor's difference module
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
|
||||
return diff_blueprint
|
||||
|
||||
@@ -71,13 +71,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
||||
if not processor_classes:
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing. Please select a different processor.").format(processor_name), 'error')
|
||||
# Fall back to default processor so user can still edit and change processor
|
||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == 'text_json_diff'), None)
|
||||
if not processor_classes:
|
||||
# If even text_json_diff is missing, something is very wrong
|
||||
flash(gettext("Could not load '{}' processor, processor plugin might be missing.").format(processor_name), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
flash(gettext("Cannot load the edit form for processor/plugin '{}', plugin missing?").format(processor_classes[1]), 'error')
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
parent_module = processors.get_parent_module(processor_classes[0])
|
||||
|
||||
@@ -154,10 +149,58 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
# IMPORTANT: These must NOT be saved to url-watches.json, only to the processor-specific JSON file
|
||||
processor_config_data = processors.extract_processor_config_from_form_data(form.data)
|
||||
processors.save_processor_config(datastore, uuid, processor_config_data)
|
||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||
processor_config_data = {}
|
||||
fields_to_remove = []
|
||||
for field_name, field_value in form.data.items():
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
if field_value: # Only save non-empty values
|
||||
processor_config_data[config_key] = field_value
|
||||
fields_to_remove.append(field_name)
|
||||
|
||||
# Save processor config to JSON file if any config data exists
|
||||
if processor_config_data:
|
||||
try:
|
||||
processor_name = form.data.get('processor')
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = processors.difference_detection_processor(datastore, uuid)
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
||||
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
|
||||
|
||||
# Call optional edit_hook if processor has one
|
||||
try:
|
||||
# Try to import the edit_hook module from the processor package
|
||||
import importlib
|
||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
||||
|
||||
try:
|
||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
||||
logger.debug(f"Found edit_hook module for {processor_name}")
|
||||
|
||||
if hasattr(edit_hook, 'on_config_save'):
|
||||
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
|
||||
watch_obj = datastore.data['watching'][uuid]
|
||||
# Call hook and get updated config
|
||||
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
|
||||
# Save updated config back to file
|
||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
||||
logger.info(f"Edit hook updated config: {updated_config}")
|
||||
else:
|
||||
logger.debug(f"Edit hook module found but no on_config_save function")
|
||||
except ModuleNotFoundError:
|
||||
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
|
||||
except Exception as hook_error:
|
||||
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
|
||||
# Remove processor-config-* fields from form.data before updating datastore
|
||||
for field_name in fields_to_remove:
|
||||
form.data.pop(field_name, None)
|
||||
|
||||
# Ignore text
|
||||
form_ignore_text = form.ignore_text.data
|
||||
@@ -267,13 +310,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
# Get fetcher capabilities instead of hardcoded logic
|
||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||
|
||||
# Add processor capabilities from module
|
||||
capabilities['supports_visual_selector'] = getattr(parent_module, 'supports_visual_selector', False)
|
||||
capabilities['supports_text_filters_and_triggers'] = getattr(parent_module, 'supports_text_filters_and_triggers', False)
|
||||
capabilities['supports_text_filters_and_triggers_elements'] = getattr(parent_module, 'supports_text_filters_and_triggers_elements', False)
|
||||
capabilities['supports_request_type'] = getattr(parent_module, 'supports_request_type', False)
|
||||
|
||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
||||
|
||||
c = [f"processor-{watch.get('processor')}"]
|
||||
|
||||
@@ -38,21 +38,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
|
||||
# Call the processor's render() function
|
||||
if processor_module and hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
# Call the processor's render() function
|
||||
if hasattr(processor_module, 'render'):
|
||||
return processor_module.render(
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request,
|
||||
url_for=url_for,
|
||||
render_template=render_template,
|
||||
flash=flash,
|
||||
redirect=redirect
|
||||
)
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
|
||||
|
||||
# Fallback: if processor doesn't have preview module, use default text preview
|
||||
content = []
|
||||
@@ -157,33 +160,39 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# Get the processor type for this watch
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||
from changedetectionio.processors import get_processor_submodule
|
||||
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||
try:
|
||||
# Try to import the processor's preview module
|
||||
import importlib
|
||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
||||
|
||||
# Call the processor's get_asset() function
|
||||
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
# Call the processor's get_asset() function
|
||||
if hasattr(processor_module, 'get_asset'):
|
||||
result = processor_module.get_asset(
|
||||
asset_name=asset_name,
|
||||
watch=watch,
|
||||
datastore=datastore,
|
||||
request=request
|
||||
)
|
||||
|
||||
if result is None:
|
||||
if result is None:
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
from flask import abort
|
||||
abort(404, description=f"Asset '{asset_name}' not found")
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
|
||||
binary_data, content_type, cache_control = result
|
||||
|
||||
response = make_response(binary_data)
|
||||
response.headers['Content-Type'] = content_type
|
||||
if cache_control:
|
||||
response.headers['Cache-Control'] = cache_control
|
||||
return response
|
||||
else:
|
||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||
except (ImportError, ModuleNotFoundError) as e:
|
||||
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
|
||||
from flask import abort
|
||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||
abort(404, description=f"Processor '{processor_name}' not found")
|
||||
|
||||
return preview_blueprint
|
||||
|
||||
@@ -45,19 +45,14 @@
|
||||
<div class="tabs collapsable">
|
||||
<ul>
|
||||
<li class="tab"><a href="#general">{{ _('General') }}</a></li>
|
||||
{% if capabilities.supports_request_type %}
|
||||
<li class="tab"><a href="#request">{{ _('Request') }}</a></li>
|
||||
{% endif %}
|
||||
{% if extra_tab_content %}
|
||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">{{ _('Browser Steps') }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
<!-- should goto extra forms? -->
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
||||
{% endif %}
|
||||
@@ -121,7 +116,6 @@
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if capabilities.supports_request_type %}
|
||||
<div class="tab-pane-inner" id="request">
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||
@@ -209,7 +203,6 @@ Math: {{ 1 + 1 }}") }}
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="tab-pane-inner" id="browser-steps">
|
||||
{% if capabilities.supports_browser_steps %}
|
||||
@@ -290,7 +283,8 @@ Math: {{ 1 + 1 }}") }}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
|
||||
<div class="tab-pane-inner" id="conditions">
|
||||
<script>
|
||||
const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}";
|
||||
@@ -309,9 +303,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
||||
<div>
|
||||
<div id="edit-text-filter">
|
||||
|
||||
{% if capabilities.supports_text_filters_and_triggers_elements %}
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
<div class="pure-control-group" id="pro-tips">
|
||||
<strong>{{ _('Pro-tips:') }}</strong><br>
|
||||
<ul>
|
||||
<li>
|
||||
@@ -322,8 +314,8 @@ Math: {{ 1 + 1 }}") }}
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{% include "edit/include_subtract.html" %}
|
||||
{% endif %}
|
||||
<div class="text-filtering border-fieldset">
|
||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||
<h3>{{ _('Text filtering') }}</h3>
|
||||
@@ -382,7 +374,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ extra_form_content|safe }}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if capabilities.supports_visual_selector %}
|
||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||
|
||||
@@ -394,7 +386,7 @@ Math: {{ 1 + 1 }}") }}
|
||||
{{ _('The Visual Selector tool lets you select the') }} <i>{{ _('text') }}</i> {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} <a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a> {{ _('tab. Use') }} <strong>{{ _('Shift+Click') }}</strong> {{ _('to select multiple items.') }}
|
||||
</span>
|
||||
|
||||
{% if watch['processor'] == 'image_ssim_diff' %} {# @todo, integrate with image_ssim_diff selector better, use some extra form ? #}
|
||||
{% if watch['processor'] == 'image_ssim_diff' %}
|
||||
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
|
||||
<label style="font-weight: 600; margin-right: 15px;">{{ _('Selection Mode:') }}</label>
|
||||
<label style="margin-right: 15px;">
|
||||
|
||||
@@ -24,8 +24,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
flash(gettext('Warning, URL {} already exists').format(url), "notice")
|
||||
|
||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||
from changedetectionio import processors
|
||||
processor = request.form.get('processor', processors.get_default_processor())
|
||||
processor = request.form.get('processor', 'text_json_diff')
|
||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||
|
||||
if new_uuid:
|
||||
|
||||
@@ -205,24 +205,23 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||
{%- endif -%}
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<a href="{{url_for('watchlist.index', tag=watch_tag_uuid) }}" class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</a>
|
||||
{%- endfor -%}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
@@ -349,12 +350,7 @@ class fetcher(Fetcher):
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
# Cleanup before raising to prevent memory leak
|
||||
await self.page.close()
|
||||
await context.close()
|
||||
await browser.close()
|
||||
# Force garbage collection to release Playwright resources immediately
|
||||
gc.collect()
|
||||
# Finally block will handle cleanup
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||
@@ -370,12 +366,7 @@ class fetcher(Fetcher):
|
||||
try:
|
||||
await self.iterate_browser_steps(start_url=url)
|
||||
except BrowserStepsStepException:
|
||||
try:
|
||||
await context.close()
|
||||
await browser.close()
|
||||
except Exception as e:
|
||||
# Fine, could be messy situation
|
||||
pass
|
||||
# Finally block will handle cleanup
|
||||
raise
|
||||
|
||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||
@@ -424,35 +415,40 @@ class fetcher(Fetcher):
|
||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||
|
||||
finally:
|
||||
# Request garbage collection one more time before closing
|
||||
# Clean up resources properly with timeouts to prevent hanging
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Clean up resources properly
|
||||
try:
|
||||
await self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
if hasattr(self, 'page') and self.page:
|
||||
await self.page.request_gc()
|
||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed page for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing page for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing page for {url}: {e}")
|
||||
finally:
|
||||
self.page = None
|
||||
|
||||
try:
|
||||
await self.page.close()
|
||||
except:
|
||||
pass
|
||||
self.page = None
|
||||
if context:
|
||||
await asyncio.wait_for(context.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed context for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing context for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing context for {url}: {e}")
|
||||
finally:
|
||||
context = None
|
||||
|
||||
try:
|
||||
await context.close()
|
||||
except:
|
||||
pass
|
||||
context = None
|
||||
|
||||
try:
|
||||
await browser.close()
|
||||
except:
|
||||
pass
|
||||
browser = None
|
||||
if browser:
|
||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
||||
logger.debug(f"Successfully closed browser connection for {url}")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Timed out closing browser connection for {url} (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error closing browser for {url}: {e}")
|
||||
finally:
|
||||
browser = None
|
||||
|
||||
# Force Python GC to release Playwright resources immediately
|
||||
# Playwright objects can have circular references that delay cleanup
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import websockets.exceptions
|
||||
@@ -221,19 +222,36 @@ class fetcher(Fetcher):
|
||||
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
|
||||
|
||||
async def quit(self, watch=None):
|
||||
try:
|
||||
await self.page.close()
|
||||
del self.page
|
||||
except Exception as e:
|
||||
pass
|
||||
watch_uuid = watch.get('uuid') if watch else 'unknown'
|
||||
|
||||
# Close page
|
||||
try:
|
||||
await self.browser.close()
|
||||
del self.browser
|
||||
if hasattr(self, 'page') and self.page:
|
||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Page closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing page (5s)")
|
||||
except Exception as e:
|
||||
pass
|
||||
logger.warning(f"[{watch_uuid}] Error closing page: {e}")
|
||||
finally:
|
||||
self.page = None
|
||||
|
||||
logger.info("Cleanup puppeteer complete.")
|
||||
# Close browser connection
|
||||
try:
|
||||
if hasattr(self, 'browser') and self.browser:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=5.0)
|
||||
logger.debug(f"[{watch_uuid}] Browser closed successfully")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"[{watch_uuid}] Timed out closing browser (5s)")
|
||||
except Exception as e:
|
||||
logger.warning(f"[{watch_uuid}] Error closing browser: {e}")
|
||||
finally:
|
||||
self.browser = None
|
||||
|
||||
logger.info(f"[{watch_uuid}] Cleanup puppeteer complete")
|
||||
|
||||
# Force garbage collection to release resources
|
||||
gc.collect()
|
||||
|
||||
async def fetch_page(self,
|
||||
current_include_filters,
|
||||
@@ -263,9 +281,11 @@ class fetcher(Fetcher):
|
||||
# Connect directly using the specified browser_ws_endpoint
|
||||
# @todo timeout
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Connecting to browser at {self.browser_connection_url}")
|
||||
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||
ignoreHTTPSErrors=True
|
||||
)
|
||||
logger.debug(f"[{watch_uuid}] Browser connected successfully")
|
||||
except websockets.exceptions.InvalidStatusCode as e:
|
||||
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
||||
except websockets.exceptions.InvalidURI:
|
||||
@@ -274,7 +294,18 @@ class fetcher(Fetcher):
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||
|
||||
# more reliable is to just request a new page
|
||||
self.page = await self.browser.newPage()
|
||||
try:
|
||||
logger.debug(f"[{watch_uuid}] Creating new page")
|
||||
self.page = await self.browser.newPage()
|
||||
logger.debug(f"[{watch_uuid}] Page created successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"[{watch_uuid}] Failed to create new page: {e}")
|
||||
# Browser is connected but page creation failed - must cleanup browser
|
||||
try:
|
||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
||||
raise
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||
@@ -343,6 +374,12 @@ class fetcher(Fetcher):
|
||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||
await asyncio.sleep(w)
|
||||
|
||||
# Check if page still exists (might have been closed due to error during sleep)
|
||||
if not self.page or not hasattr(self.page, '_client'):
|
||||
logger.debug("Page already closed, skipping stopLoading")
|
||||
return
|
||||
|
||||
logger.debug("Issuing stopLoading command...")
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
logger.debug("stopLoading command sent!")
|
||||
@@ -368,7 +405,9 @@ class fetcher(Fetcher):
|
||||
asyncio.create_task(handle_frame_navigation())
|
||||
response = await self.page.goto(url, timeout=0)
|
||||
await asyncio.sleep(1 + extra_wait)
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
# Check if page still exists before sending command
|
||||
if self.page and hasattr(self.page, '_client'):
|
||||
await self.page._client.send('Page.stopLoading')
|
||||
|
||||
if response:
|
||||
break
|
||||
@@ -437,15 +476,9 @@ class fetcher(Fetcher):
|
||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||
|
||||
# Force aggressive memory cleanup - pyppeteer base64 decode creates temporary buffers
|
||||
# Force garbage collection - pyppeteer base64 decode creates temporary buffers
|
||||
import gc
|
||||
gc.collect()
|
||||
# Release C-level memory from base64 decode back to OS
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
|
||||
@@ -730,7 +730,7 @@ class quickWatchForm(Form):
|
||||
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
||||
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
@@ -749,7 +749,7 @@ class commonSettingsForm(Form):
|
||||
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
||||
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))])
|
||||
|
||||
@@ -763,7 +763,7 @@ class commonSettingsForm(Form):
|
||||
|
||||
|
||||
class importForm(Form):
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
||||
urls = TextAreaField(_l('URLs'))
|
||||
xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))])
|
||||
file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||
|
||||
@@ -105,30 +105,6 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def register_processor(self):
|
||||
"""Register an external processor plugin.
|
||||
|
||||
External packages can implement this hook to register custom processors
|
||||
that will be discovered alongside built-in processors.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with processor information:
|
||||
{
|
||||
'processor_name': str, # Machine name (e.g., 'osint_recon')
|
||||
'processor_module': module, # Module containing processor.py
|
||||
'processor_class': class, # The perform_site_check class
|
||||
'metadata': { # Optional metadata
|
||||
'name': str, # Display name
|
||||
'description': str, # Description
|
||||
'processor_weight': int,# Sort weight (lower = higher priority)
|
||||
'list_badge_text': str, # Badge text for UI
|
||||
}
|
||||
}
|
||||
Return None if this plugin doesn't provide a processor
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
|
||||
@@ -17,11 +17,9 @@ def find_sub_packages(package_name):
|
||||
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def find_processors():
|
||||
"""
|
||||
Find all subclasses of DifferenceDetectionProcessor in the specified package.
|
||||
Results are cached to avoid repeated discovery.
|
||||
|
||||
:param package_name: The name of the package to scan for processor modules.
|
||||
:return: A list of (module, class) tuples.
|
||||
@@ -48,22 +46,6 @@ def find_processors():
|
||||
except (ModuleNotFoundError, ImportError) as e:
|
||||
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
||||
|
||||
# Discover plugin processors via pluggy
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_module = result.get('processor_module')
|
||||
processor_name = result.get('processor_name')
|
||||
|
||||
if processor_module and processor_name:
|
||||
processors.append((processor_module, processor_name))
|
||||
logger.info(f"Registered plugin processor: {processor_name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error loading plugin processors: {e}")
|
||||
|
||||
return processors
|
||||
|
||||
|
||||
@@ -115,137 +97,54 @@ def find_processor_module(processor_name):
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_module(processor_name):
|
||||
"""
|
||||
Get the actual processor module (with perform_site_check class) by name.
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
|
||||
Returns:
|
||||
module: The processor module containing perform_site_check, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if processor_tuple:
|
||||
# Return the actual processor module (first element of tuple)
|
||||
return processor_tuple[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_processor_submodule(processor_name, submodule_name):
|
||||
"""
|
||||
Get an optional submodule from a processor (e.g., 'difference', 'extract', 'preview').
|
||||
Works for both built-in and plugin processors.
|
||||
|
||||
Args:
|
||||
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||
submodule_name: Name of the submodule (e.g., 'difference', 'extract', 'preview')
|
||||
|
||||
Returns:
|
||||
module: The submodule if it exists, or None if not found
|
||||
"""
|
||||
processor_classes = find_processors()
|
||||
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||
|
||||
if not processor_tuple:
|
||||
return None
|
||||
|
||||
processor_module = processor_tuple[0]
|
||||
parent_module = get_parent_module(processor_module)
|
||||
|
||||
if not parent_module:
|
||||
return None
|
||||
|
||||
# Try to import the submodule
|
||||
try:
|
||||
# For built-in processors: changedetectionio.processors.text_json_diff.difference
|
||||
# For plugin processors: changedetectionio_osint.difference
|
||||
parent_module_name = parent_module.__name__
|
||||
submodule_full_name = f"{parent_module_name}.{submodule_name}"
|
||||
return importlib.import_module(submodule_full_name)
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_plugin_processor_metadata():
|
||||
"""Get metadata from plugin processors."""
|
||||
metadata = {}
|
||||
try:
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
plugin_results = plugin_manager.hook.register_processor()
|
||||
|
||||
for result in plugin_results:
|
||||
if result and isinstance(result, dict):
|
||||
processor_name = result.get('processor_name')
|
||||
meta = result.get('metadata', {})
|
||||
if processor_name:
|
||||
metadata[processor_name] = meta
|
||||
except Exception as e:
|
||||
logger.warning(f"Error getting plugin processor metadata: {e}")
|
||||
return metadata
|
||||
|
||||
|
||||
def available_processors():
|
||||
"""
|
||||
Get a list of processors by name and description for the UI elements.
|
||||
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
|
||||
:return: A list :)
|
||||
"""
|
||||
|
||||
processor_classes = find_processors()
|
||||
|
||||
# Check if DISABLED_PROCESSORS env var is set
|
||||
disabled_processors_env = os.getenv('DISABLED_PROCESSORS', 'image_ssim_diff').strip()
|
||||
disabled_processors = []
|
||||
if disabled_processors_env:
|
||||
# Check if ALLOWED_PROCESSORS env var is set
|
||||
# For now we disable it, need to make a deploy with lots of new code and this will be an overload
|
||||
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', 'text_json_diff, restock_diff').strip()
|
||||
allowed_processors = None
|
||||
if allowed_processors_env:
|
||||
# Parse comma-separated list and strip whitespace
|
||||
disabled_processors = [p.strip() for p in disabled_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"DISABLED_PROCESSORS set, disabling: {disabled_processors}")
|
||||
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
|
||||
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
|
||||
|
||||
available = []
|
||||
plugin_metadata = get_plugin_processor_metadata()
|
||||
|
||||
for module, sub_package_name in processor_classes:
|
||||
# Skip disabled processors
|
||||
if sub_package_name in disabled_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (in DISABLED_PROCESSORS)")
|
||||
# Filter by allowed processors if set
|
||||
if allowed_processors and sub_package_name not in allowed_processors:
|
||||
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
|
||||
continue
|
||||
|
||||
# Check if this is a plugin processor
|
||||
if sub_package_name in plugin_metadata:
|
||||
meta = plugin_metadata[sub_package_name]
|
||||
description = gettext(meta.get('name', sub_package_name))
|
||||
# Plugin processors start from weight 10 to separate them from built-in processors
|
||||
weight = 100 + meta.get('processor_weight', 0)
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
else:
|
||||
# Try to get the 'name' attribute from the processor module first
|
||||
if hasattr(module, 'name'):
|
||||
description = gettext(module.name)
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
else:
|
||||
# Fall back to processor_description from parent module's __init__.py
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||
description = gettext(parent_module.processor_description)
|
||||
else:
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
# Final fallback to a readable name
|
||||
description = sub_package_name.replace('_', ' ').title()
|
||||
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
# Get weight for sorting (lower weight = higher in list)
|
||||
weight = 0 # Default weight for processors without explicit weight
|
||||
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
# Check processor module itself first
|
||||
if hasattr(module, 'processor_weight'):
|
||||
weight = module.processor_weight
|
||||
else:
|
||||
# Fall back to parent module (package __init__.py)
|
||||
parent_module = get_parent_module(module)
|
||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||
weight = parent_module.processor_weight
|
||||
|
||||
available.append((sub_package_name, description, weight))
|
||||
|
||||
@@ -256,20 +155,6 @@ def available_processors():
|
||||
return [(name, desc) for name, desc, weight in available]
|
||||
|
||||
|
||||
def get_default_processor():
|
||||
"""
|
||||
Get the default processor to use when none is specified.
|
||||
Returns the first available processor based on weight (lowest weight = highest priority).
|
||||
This ensures forms auto-select a valid processor even when DISABLED_PROCESSORS filters the list.
|
||||
|
||||
:return: The processor name string (e.g., 'text_json_diff')
|
||||
"""
|
||||
available = available_processors()
|
||||
if available:
|
||||
return available[0][0] # Return the processor name from first tuple
|
||||
return 'text_json_diff' # Fallback if somehow no processors are available
|
||||
|
||||
|
||||
def get_processor_badge_texts():
|
||||
"""
|
||||
Get a dictionary mapping processor names to their list_badge_text values.
|
||||
@@ -394,76 +279,3 @@ def get_processor_badge_css():
|
||||
|
||||
return '\n\n'.join(css_rules)
|
||||
|
||||
|
||||
def save_processor_config(datastore, watch_uuid, config_data):
|
||||
"""
|
||||
Save processor-specific configuration to JSON file.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration storage.
|
||||
|
||||
Args:
|
||||
datastore: The application datastore instance
|
||||
watch_uuid: UUID of the watch
|
||||
config_data: Dictionary of configuration data to save (with processor_config_* prefix removed)
|
||||
|
||||
Returns:
|
||||
bool: True if saved successfully, False otherwise
|
||||
"""
|
||||
if not config_data:
|
||||
return True
|
||||
|
||||
try:
|
||||
from changedetectionio.processors.base import difference_detection_processor
|
||||
|
||||
# Get processor name from watch
|
||||
watch = datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
logger.error(f"Cannot save processor config: watch {watch_uuid} not found")
|
||||
return False
|
||||
|
||||
processor_name = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Create a processor instance to access config methods
|
||||
processor_instance = difference_detection_processor(datastore, watch_uuid)
|
||||
|
||||
# Use processor name as filename so each processor keeps its own config
|
||||
config_filename = f'{processor_name}.json'
|
||||
processor_instance.update_extra_watch_config(config_filename, config_data)
|
||||
|
||||
logger.debug(f"Saved processor config to {config_filename}: {config_data}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to save processor config: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def extract_processor_config_from_form_data(form_data):
|
||||
"""
|
||||
Extract processor_config_* fields from form data and return separate dicts.
|
||||
|
||||
This is a shared helper function used by both the UI edit form and API endpoints
|
||||
to consistently handle processor configuration extraction.
|
||||
|
||||
IMPORTANT: This function modifies form_data in-place by removing processor_config_* fields.
|
||||
|
||||
Args:
|
||||
form_data: Dictionary of form data (will be modified in-place)
|
||||
|
||||
Returns:
|
||||
dict: Dictionary of processor config data (with processor_config_* prefix removed)
|
||||
"""
|
||||
processor_config_data = {}
|
||||
|
||||
# Use list() to create a copy of keys since we're modifying the dict
|
||||
for field_name in list(form_data.keys()):
|
||||
if field_name.startswith('processor_config_'):
|
||||
config_key = field_name.replace('processor_config_', '')
|
||||
# Save all values (including empty strings) to allow explicit clearing of settings
|
||||
processor_config_data[config_key] = form_data[field_name]
|
||||
# Remove from form_data to prevent it from reaching datastore
|
||||
del form_data[field_name]
|
||||
|
||||
return processor_config_data
|
||||
|
||||
|
||||
@@ -12,13 +12,6 @@ processor_description = "Visual/Screenshot change detection (Fast)"
|
||||
processor_name = "image_ssim_diff"
|
||||
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = False
|
||||
supports_text_filters_and_triggers_elements = False
|
||||
supports_request_type = True
|
||||
|
||||
PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json"
|
||||
|
||||
# Subprocess timeout settings
|
||||
|
||||
@@ -4,13 +4,6 @@ from changedetectionio.model.Watch import model as BaseWatch
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||
|
||||
@@ -1,11 +1,5 @@
|
||||
from loguru import logger
|
||||
|
||||
# Processor capabilities
|
||||
supports_visual_selector = True
|
||||
supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
from loguru import logger
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -142,14 +142,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
|
||||
# Init a new 'difference_detection_processor'
|
||||
# Use get_processor_module() to support both built-in and plugin processors
|
||||
from changedetectionio.processors import get_processor_module
|
||||
processor_module = get_processor_module(processor)
|
||||
|
||||
if not processor_module:
|
||||
error_msg = f"Processor module '{processor}' not found."
|
||||
logger.error(error_msg)
|
||||
raise ModuleNotFoundError(error_msg)
|
||||
try:
|
||||
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
|
||||
except ModuleNotFoundError as e:
|
||||
print(f"Processor module '{processor}' not found.")
|
||||
raise e
|
||||
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid)
|
||||
@@ -478,14 +475,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
del update_handler
|
||||
update_handler = None
|
||||
|
||||
# Force aggressive memory cleanup after clearing
|
||||
# Force garbage collection
|
||||
import gc
|
||||
gc.collect()
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||
@@ -498,6 +490,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
finally:
|
||||
# Always cleanup - this runs whether there was an exception or not
|
||||
if uuid:
|
||||
# Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
|
||||
try:
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
await update_handler.fetcher.quit(watch=watch)
|
||||
@@ -506,35 +499,25 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
||||
try:
|
||||
# Release UUID from processing (thread-safe)
|
||||
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
|
||||
|
||||
|
||||
# Send completion signal
|
||||
if watch:
|
||||
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
|
||||
watch_check_update.send(watch_uuid=watch['uuid'])
|
||||
|
||||
# Explicitly clean up update_handler and all its references
|
||||
# Clean up all memory references BEFORE garbage collection
|
||||
if update_handler:
|
||||
# Clear fetcher content using the proper method
|
||||
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
|
||||
# Clear processor references
|
||||
if hasattr(update_handler, 'content_processor'):
|
||||
update_handler.content_processor = None
|
||||
|
||||
del update_handler
|
||||
update_handler = None
|
||||
|
||||
# Clear local contents variable if it still exists
|
||||
# Clear large content variables
|
||||
if 'contents' in locals():
|
||||
del contents
|
||||
|
||||
# Note: We don't set watch = None here because:
|
||||
# 1. watch is just a local reference to datastore.data['watching'][uuid]
|
||||
# 2. Setting it to None doesn't affect the datastore
|
||||
# 3. GC can't collect the object anyway (still referenced by datastore)
|
||||
# 4. It would just cause confusion
|
||||
|
||||
# Force garbage collection after cleanup
|
||||
# Force garbage collection after all references are cleared
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
|
||||
@@ -16,13 +16,6 @@ services:
|
||||
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
|
||||
# - LOGGER_LEVEL=TRACE
|
||||
#
|
||||
# Plugins! See https://changedetection.io/plugins for more plugins.
|
||||
# Install additional Python packages (processor plugins, etc.)
|
||||
# Example: Install the OSINT reconnaissance processor plugin
|
||||
# - EXTRA_PACKAGES=changedetection.io-osint
|
||||
# Multiple packages can be installed by separating with spaces:
|
||||
# - EXTRA_PACKAGES=changedetection.io-osint another-plugin
|
||||
#
|
||||
#
|
||||
# Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol)
|
||||
# - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Install additional packages from EXTRA_PACKAGES env var
|
||||
# Uses a marker file to avoid reinstalling on every container restart
|
||||
INSTALLED_MARKER="/datastore/.extra_packages_installed"
|
||||
CURRENT_PACKAGES="$EXTRA_PACKAGES"
|
||||
|
||||
if [ -n "$EXTRA_PACKAGES" ]; then
|
||||
# Check if we need to install/update packages
|
||||
if [ ! -f "$INSTALLED_MARKER" ] || [ "$(cat $INSTALLED_MARKER 2>/dev/null)" != "$CURRENT_PACKAGES" ]; then
|
||||
echo "Installing extra packages: $EXTRA_PACKAGES"
|
||||
pip3 install --no-cache-dir $EXTRA_PACKAGES
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "$CURRENT_PACKAGES" > "$INSTALLED_MARKER"
|
||||
echo "Extra packages installed successfully"
|
||||
else
|
||||
echo "ERROR: Failed to install extra packages"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Extra packages already installed: $EXTRA_PACKAGES"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Execute the main command
|
||||
exec "$@"
|
||||
@@ -51,9 +51,9 @@ linkify-it-py
|
||||
|
||||
# - Needed for apprise/spush, and maybe others? hopefully doesnt trigger a rust compile.
|
||||
# - Requires extra wheel for rPi, adds build time for arm/v8 which is not in piwheels
|
||||
# Pinned to 44.x for ARM compatibility and sslyze compatibility (sslyze requires <45) and (45.x may not have pre-built ARM wheels)
|
||||
# Pinned to 43.0.1 for ARM compatibility (45.x may not have pre-built ARM wheels)
|
||||
# Also pinned because dependabot wants specific versions
|
||||
cryptography==44.0.0
|
||||
cryptography==46.0.3
|
||||
|
||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||
# use any version other than 2.0.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
|
||||
|
||||
Reference in New Issue
Block a user