mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-02-04 05:16:08 +00:00
Compare commits
13 Commits
master
...
processor-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7050077be | ||
|
|
5f25e3825c | ||
|
|
041c1ad531 | ||
|
|
e958acebed | ||
|
|
b826d9b236 | ||
|
|
bc3efbff27 | ||
|
|
063ee38099 | ||
|
|
5007b8201e | ||
|
|
7e853a4b46 | ||
|
|
4dc5301de4 | ||
|
|
edf0989cd4 | ||
|
|
423b546948 | ||
|
|
c1c810a79a |
@@ -138,6 +138,15 @@ ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
|||||||
ENV LC_ALL=en_US.UTF-8
|
ENV LC_ALL=en_US.UTF-8
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy and set up entrypoint script for installing extra packages
|
||||||
|
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||||
|
RUN chmod +x /docker-entrypoint.sh
|
||||||
|
|
||||||
|
# Set entrypoint to handle EXTRA_PACKAGES env var
|
||||||
|
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||||
|
|
||||||
|
# Default command (can be overridden in docker-compose.yml)
|
||||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -169,58 +169,18 @@ class Watch(Resource):
|
|||||||
|
|
||||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||||
from changedetectionio import processors
|
from changedetectionio import processors
|
||||||
processor_config_data = {}
|
|
||||||
regular_data = {}
|
|
||||||
|
|
||||||
for key, value in request.json.items():
|
# Make a mutable copy of request.json for modification
|
||||||
if key.startswith('processor_config_'):
|
json_data = dict(request.json)
|
||||||
config_key = key.replace('processor_config_', '')
|
|
||||||
if value: # Only save non-empty values
|
# Extract and remove processor config fields from json_data
|
||||||
processor_config_data[config_key] = value
|
processor_config_data = processors.extract_processor_config_from_form_data(json_data)
|
||||||
else:
|
|
||||||
regular_data[key] = value
|
|
||||||
|
|
||||||
# Update watch with regular (non-processor-config) fields
|
# Update watch with regular (non-processor-config) fields
|
||||||
watch.update(regular_data)
|
watch.update(json_data)
|
||||||
|
|
||||||
# Save processor config to JSON file if any config data exists
|
# Save processor config to JSON file
|
||||||
if processor_config_data:
|
processors.save_processor_config(self.datastore, uuid, processor_config_data)
|
||||||
try:
|
|
||||||
processor_name = request.json.get('processor', watch.get('processor'))
|
|
||||||
if processor_name:
|
|
||||||
# Create a processor instance to access config methods
|
|
||||||
from changedetectionio.processors import difference_detection_processor
|
|
||||||
processor_instance = difference_detection_processor(self.datastore, uuid)
|
|
||||||
# Use processor name as filename so each processor keeps its own config
|
|
||||||
config_filename = f'{processor_name}.json'
|
|
||||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
|
||||||
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
|
|
||||||
|
|
||||||
# Call optional edit_hook if processor has one
|
|
||||||
try:
|
|
||||||
import importlib
|
|
||||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
|
||||||
|
|
||||||
try:
|
|
||||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
|
||||||
logger.debug(f"API: Found edit_hook module for {processor_name}")
|
|
||||||
|
|
||||||
if hasattr(edit_hook, 'on_config_save'):
|
|
||||||
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
|
|
||||||
# Call hook and get updated config
|
|
||||||
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
|
|
||||||
# Save updated config back to file
|
|
||||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
|
||||||
logger.info(f"API: Edit hook updated config: {updated_config}")
|
|
||||||
else:
|
|
||||||
logger.debug(f"API: Edit hook module found but no on_config_save function")
|
|
||||||
except ModuleNotFoundError:
|
|
||||||
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
|
|
||||||
except Exception as hook_error:
|
|
||||||
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"API: Failed to save processor config: {e}")
|
|
||||||
|
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
|
|
||||||
|
|||||||
@@ -26,8 +26,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
# URL List import
|
# URL List import
|
||||||
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||||
# Import and push into the queue for immediate update check
|
# Import and push into the queue for immediate update check
|
||||||
|
from changedetectionio import processors
|
||||||
importer_handler = import_url_list()
|
importer_handler = import_url_list()
|
||||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', processors.get_default_processor()))
|
||||||
logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs")
|
logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs")
|
||||||
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
# Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue
|
||||||
# for uuid in importer_handler.new_uuids:
|
# for uuid in importer_handler.new_uuids:
|
||||||
|
|||||||
@@ -100,23 +100,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# Get the processor type for this watch
|
# Get the processor type for this watch
|
||||||
processor_name = watch.get('processor', 'text_json_diff')
|
processor_name = watch.get('processor', 'text_json_diff')
|
||||||
|
|
||||||
try:
|
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||||
# Try to import the processor's difference module
|
from changedetectionio.processors import get_processor_submodule
|
||||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||||
|
|
||||||
# Call the processor's render() function
|
# Call the processor's render() function
|
||||||
if hasattr(processor_module, 'render'):
|
if processor_module and hasattr(processor_module, 'render'):
|
||||||
return processor_module.render(
|
return processor_module.render(
|
||||||
watch=watch,
|
watch=watch,
|
||||||
datastore=datastore,
|
datastore=datastore,
|
||||||
request=request,
|
request=request,
|
||||||
url_for=url_for,
|
url_for=url_for,
|
||||||
render_template=render_template,
|
render_template=render_template,
|
||||||
flash=flash,
|
flash=flash,
|
||||||
redirect=redirect
|
redirect=redirect
|
||||||
)
|
)
|
||||||
except (ImportError, ModuleNotFoundError) as e:
|
|
||||||
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
|
|
||||||
|
|
||||||
# Fallback: if processor doesn't have difference module, use text_json_diff as default
|
# Fallback: if processor doesn't have difference module, use text_json_diff as default
|
||||||
from changedetectionio.processors.text_json_diff.difference import render as default_render
|
from changedetectionio.processors.text_json_diff.difference import render as default_render
|
||||||
@@ -156,23 +154,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# Get the processor type for this watch
|
# Get the processor type for this watch
|
||||||
processor_name = watch.get('processor', 'text_json_diff')
|
processor_name = watch.get('processor', 'text_json_diff')
|
||||||
|
|
||||||
try:
|
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||||
# Try to import the processor's extract module
|
from changedetectionio.processors import get_processor_submodule
|
||||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||||
|
|
||||||
# Call the processor's render_form() function
|
# Call the processor's render_form() function
|
||||||
if hasattr(processor_module, 'render_form'):
|
if processor_module and hasattr(processor_module, 'render_form'):
|
||||||
return processor_module.render_form(
|
return processor_module.render_form(
|
||||||
watch=watch,
|
watch=watch,
|
||||||
datastore=datastore,
|
datastore=datastore,
|
||||||
request=request,
|
request=request,
|
||||||
url_for=url_for,
|
url_for=url_for,
|
||||||
render_template=render_template,
|
render_template=render_template,
|
||||||
flash=flash,
|
flash=flash,
|
||||||
redirect=redirect
|
redirect=redirect
|
||||||
)
|
)
|
||||||
except (ImportError, ModuleNotFoundError) as e:
|
|
||||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
|
||||||
|
|
||||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||||
from changedetectionio.processors.extract import render_form as default_render_form
|
from changedetectionio.processors.extract import render_form as default_render_form
|
||||||
@@ -212,24 +208,22 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# Get the processor type for this watch
|
# Get the processor type for this watch
|
||||||
processor_name = watch.get('processor', 'text_json_diff')
|
processor_name = watch.get('processor', 'text_json_diff')
|
||||||
|
|
||||||
try:
|
# Try to get the processor's extract module (works for both built-in and plugin processors)
|
||||||
# Try to import the processor's extract module
|
from changedetectionio.processors import get_processor_submodule
|
||||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
|
processor_module = get_processor_submodule(processor_name, 'extract')
|
||||||
|
|
||||||
# Call the processor's process_extraction() function
|
# Call the processor's process_extraction() function
|
||||||
if hasattr(processor_module, 'process_extraction'):
|
if processor_module and hasattr(processor_module, 'process_extraction'):
|
||||||
return processor_module.process_extraction(
|
return processor_module.process_extraction(
|
||||||
watch=watch,
|
watch=watch,
|
||||||
datastore=datastore,
|
datastore=datastore,
|
||||||
request=request,
|
request=request,
|
||||||
url_for=url_for,
|
url_for=url_for,
|
||||||
make_response=make_response,
|
make_response=make_response,
|
||||||
send_from_directory=send_from_directory,
|
send_from_directory=send_from_directory,
|
||||||
flash=flash,
|
flash=flash,
|
||||||
redirect=redirect
|
redirect=redirect
|
||||||
)
|
)
|
||||||
except (ImportError, ModuleNotFoundError) as e:
|
|
||||||
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
|
|
||||||
|
|
||||||
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
# Fallback: if processor doesn't have extract module, use base processors.extract as default
|
||||||
from changedetectionio.processors.extract import process_extraction as default_process_extraction
|
from changedetectionio.processors.extract import process_extraction as default_process_extraction
|
||||||
@@ -279,38 +273,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# Get the processor type for this watch
|
# Get the processor type for this watch
|
||||||
processor_name = watch.get('processor', 'text_json_diff')
|
processor_name = watch.get('processor', 'text_json_diff')
|
||||||
|
|
||||||
try:
|
# Try to get the processor's difference module (works for both built-in and plugin processors)
|
||||||
# Try to import the processor's difference module
|
from changedetectionio.processors import get_processor_submodule
|
||||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
|
processor_module = get_processor_submodule(processor_name, 'difference')
|
||||||
|
|
||||||
# Call the processor's get_asset() function
|
# Call the processor's get_asset() function
|
||||||
if hasattr(processor_module, 'get_asset'):
|
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||||
result = processor_module.get_asset(
|
result = processor_module.get_asset(
|
||||||
asset_name=asset_name,
|
asset_name=asset_name,
|
||||||
watch=watch,
|
watch=watch,
|
||||||
datastore=datastore,
|
datastore=datastore,
|
||||||
request=request
|
request=request
|
||||||
)
|
)
|
||||||
|
|
||||||
if result is None:
|
if result is None:
|
||||||
from flask import abort
|
|
||||||
abort(404, description=f"Asset '{asset_name}' not found")
|
|
||||||
|
|
||||||
binary_data, content_type, cache_control = result
|
|
||||||
|
|
||||||
response = make_response(binary_data)
|
|
||||||
response.headers['Content-Type'] = content_type
|
|
||||||
if cache_control:
|
|
||||||
response.headers['Cache-Control'] = cache_control
|
|
||||||
return response
|
|
||||||
else:
|
|
||||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
|
||||||
from flask import abort
|
from flask import abort
|
||||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
abort(404, description=f"Asset '{asset_name}' not found")
|
||||||
|
|
||||||
except (ImportError, ModuleNotFoundError) as e:
|
binary_data, content_type, cache_control = result
|
||||||
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
|
|
||||||
|
response = make_response(binary_data)
|
||||||
|
response.headers['Content-Type'] = content_type
|
||||||
|
if cache_control:
|
||||||
|
response.headers['Cache-Control'] = cache_control
|
||||||
|
return response
|
||||||
|
else:
|
||||||
|
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||||
from flask import abort
|
from flask import abort
|
||||||
abort(404, description=f"Processor '{processor_name}' not found")
|
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||||
|
|
||||||
return diff_blueprint
|
return diff_blueprint
|
||||||
|
|||||||
@@ -71,8 +71,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||||
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
||||||
if not processor_classes:
|
if not processor_classes:
|
||||||
flash(gettext("Cannot load the edit form for processor/plugin '{}', plugin missing?").format(processor_classes[1]), 'error')
|
flash(gettext("Could not load '{}' processor, processor plugin might be missing. Please select a different processor.").format(processor_name), 'error')
|
||||||
return redirect(url_for('watchlist.index'))
|
# Fall back to default processor so user can still edit and change processor
|
||||||
|
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == 'text_json_diff'), None)
|
||||||
|
if not processor_classes:
|
||||||
|
# If even text_json_diff is missing, something is very wrong
|
||||||
|
flash(gettext("Could not load '{}' processor, processor plugin might be missing.").format(processor_name), 'error')
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
parent_module = processors.get_parent_module(processor_classes[0])
|
parent_module = processors.get_parent_module(processor_classes[0])
|
||||||
|
|
||||||
@@ -149,58 +154,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
|
|
||||||
extra_update_obj['time_between_check'] = form.time_between_check.data
|
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||||
|
|
||||||
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
# Handle processor-config-* fields separately (save to JSON, not datastore)
|
||||||
processor_config_data = {}
|
# IMPORTANT: These must NOT be saved to url-watches.json, only to the processor-specific JSON file
|
||||||
fields_to_remove = []
|
processor_config_data = processors.extract_processor_config_from_form_data(form.data)
|
||||||
for field_name, field_value in form.data.items():
|
processors.save_processor_config(datastore, uuid, processor_config_data)
|
||||||
if field_name.startswith('processor_config_'):
|
|
||||||
config_key = field_name.replace('processor_config_', '')
|
|
||||||
if field_value: # Only save non-empty values
|
|
||||||
processor_config_data[config_key] = field_value
|
|
||||||
fields_to_remove.append(field_name)
|
|
||||||
|
|
||||||
# Save processor config to JSON file if any config data exists
|
|
||||||
if processor_config_data:
|
|
||||||
try:
|
|
||||||
processor_name = form.data.get('processor')
|
|
||||||
# Create a processor instance to access config methods
|
|
||||||
processor_instance = processors.difference_detection_processor(datastore, uuid)
|
|
||||||
# Use processor name as filename so each processor keeps its own config
|
|
||||||
config_filename = f'{processor_name}.json'
|
|
||||||
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
|
|
||||||
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
|
|
||||||
|
|
||||||
# Call optional edit_hook if processor has one
|
|
||||||
try:
|
|
||||||
# Try to import the edit_hook module from the processor package
|
|
||||||
import importlib
|
|
||||||
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
|
|
||||||
|
|
||||||
try:
|
|
||||||
edit_hook = importlib.import_module(edit_hook_module_name)
|
|
||||||
logger.debug(f"Found edit_hook module for {processor_name}")
|
|
||||||
|
|
||||||
if hasattr(edit_hook, 'on_config_save'):
|
|
||||||
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
|
|
||||||
watch_obj = datastore.data['watching'][uuid]
|
|
||||||
# Call hook and get updated config
|
|
||||||
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
|
|
||||||
# Save updated config back to file
|
|
||||||
processor_instance.update_extra_watch_config(config_filename, updated_config)
|
|
||||||
logger.info(f"Edit hook updated config: {updated_config}")
|
|
||||||
else:
|
|
||||||
logger.debug(f"Edit hook module found but no on_config_save function")
|
|
||||||
except ModuleNotFoundError:
|
|
||||||
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
|
|
||||||
except Exception as hook_error:
|
|
||||||
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to save processor config: {e}")
|
|
||||||
|
|
||||||
# Remove processor-config-* fields from form.data before updating datastore
|
|
||||||
for field_name in fields_to_remove:
|
|
||||||
form.data.pop(field_name, None)
|
|
||||||
|
|
||||||
# Ignore text
|
# Ignore text
|
||||||
form_ignore_text = form.ignore_text.data
|
form_ignore_text = form.ignore_text.data
|
||||||
@@ -310,6 +267,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
|
|
||||||
# Get fetcher capabilities instead of hardcoded logic
|
# Get fetcher capabilities instead of hardcoded logic
|
||||||
capabilities = get_fetcher_capabilities(watch, datastore)
|
capabilities = get_fetcher_capabilities(watch, datastore)
|
||||||
|
|
||||||
|
# Add processor capabilities from module
|
||||||
|
capabilities['supports_visual_selector'] = getattr(parent_module, 'supports_visual_selector', False)
|
||||||
|
capabilities['supports_text_filters_and_triggers'] = getattr(parent_module, 'supports_text_filters_and_triggers', False)
|
||||||
|
capabilities['supports_text_filters_and_triggers_elements'] = getattr(parent_module, 'supports_text_filters_and_triggers_elements', False)
|
||||||
|
capabilities['supports_request_type'] = getattr(parent_module, 'supports_request_type', False)
|
||||||
|
|
||||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
|
||||||
|
|
||||||
c = [f"processor-{watch.get('processor')}"]
|
c = [f"processor-{watch.get('processor')}"]
|
||||||
|
|||||||
@@ -38,24 +38,21 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# Get the processor type for this watch
|
# Get the processor type for this watch
|
||||||
processor_name = watch.get('processor', 'text_json_diff')
|
processor_name = watch.get('processor', 'text_json_diff')
|
||||||
|
|
||||||
try:
|
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||||
# Try to import the processor's preview module
|
from changedetectionio.processors import get_processor_submodule
|
||||||
import importlib
|
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
|
||||||
|
|
||||||
# Call the processor's render() function
|
# Call the processor's render() function
|
||||||
if hasattr(processor_module, 'render'):
|
if processor_module and hasattr(processor_module, 'render'):
|
||||||
return processor_module.render(
|
return processor_module.render(
|
||||||
watch=watch,
|
watch=watch,
|
||||||
datastore=datastore,
|
datastore=datastore,
|
||||||
request=request,
|
request=request,
|
||||||
url_for=url_for,
|
url_for=url_for,
|
||||||
render_template=render_template,
|
render_template=render_template,
|
||||||
flash=flash,
|
flash=flash,
|
||||||
redirect=redirect
|
redirect=redirect
|
||||||
)
|
)
|
||||||
except (ImportError, ModuleNotFoundError) as e:
|
|
||||||
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
|
|
||||||
|
|
||||||
# Fallback: if processor doesn't have preview module, use default text preview
|
# Fallback: if processor doesn't have preview module, use default text preview
|
||||||
content = []
|
content = []
|
||||||
@@ -160,39 +157,33 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# Get the processor type for this watch
|
# Get the processor type for this watch
|
||||||
processor_name = watch.get('processor', 'text_json_diff')
|
processor_name = watch.get('processor', 'text_json_diff')
|
||||||
|
|
||||||
try:
|
# Try to get the processor's preview module (works for both built-in and plugin processors)
|
||||||
# Try to import the processor's preview module
|
from changedetectionio.processors import get_processor_submodule
|
||||||
import importlib
|
processor_module = get_processor_submodule(processor_name, 'preview')
|
||||||
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
|
|
||||||
|
|
||||||
# Call the processor's get_asset() function
|
# Call the processor's get_asset() function
|
||||||
if hasattr(processor_module, 'get_asset'):
|
if processor_module and hasattr(processor_module, 'get_asset'):
|
||||||
result = processor_module.get_asset(
|
result = processor_module.get_asset(
|
||||||
asset_name=asset_name,
|
asset_name=asset_name,
|
||||||
watch=watch,
|
watch=watch,
|
||||||
datastore=datastore,
|
datastore=datastore,
|
||||||
request=request
|
request=request
|
||||||
)
|
)
|
||||||
|
|
||||||
if result is None:
|
if result is None:
|
||||||
from flask import abort
|
|
||||||
abort(404, description=f"Asset '{asset_name}' not found")
|
|
||||||
|
|
||||||
binary_data, content_type, cache_control = result
|
|
||||||
|
|
||||||
response = make_response(binary_data)
|
|
||||||
response.headers['Content-Type'] = content_type
|
|
||||||
if cache_control:
|
|
||||||
response.headers['Cache-Control'] = cache_control
|
|
||||||
return response
|
|
||||||
else:
|
|
||||||
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
|
||||||
from flask import abort
|
from flask import abort
|
||||||
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
abort(404, description=f"Asset '{asset_name}' not found")
|
||||||
|
|
||||||
except (ImportError, ModuleNotFoundError) as e:
|
binary_data, content_type, cache_control = result
|
||||||
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
|
|
||||||
|
response = make_response(binary_data)
|
||||||
|
response.headers['Content-Type'] = content_type
|
||||||
|
if cache_control:
|
||||||
|
response.headers['Cache-Control'] = cache_control
|
||||||
|
return response
|
||||||
|
else:
|
||||||
|
logger.warning(f"Processor {processor_name} does not implement get_asset()")
|
||||||
from flask import abort
|
from flask import abort
|
||||||
abort(404, description=f"Processor '{processor_name}' not found")
|
abort(404, description=f"Processor '{processor_name}' does not support assets")
|
||||||
|
|
||||||
return preview_blueprint
|
return preview_blueprint
|
||||||
|
|||||||
@@ -45,14 +45,19 @@
|
|||||||
<div class="tabs collapsable">
|
<div class="tabs collapsable">
|
||||||
<ul>
|
<ul>
|
||||||
<li class="tab"><a href="#general">{{ _('General') }}</a></li>
|
<li class="tab"><a href="#general">{{ _('General') }}</a></li>
|
||||||
|
{% if capabilities.supports_request_type %}
|
||||||
<li class="tab"><a href="#request">{{ _('Request') }}</a></li>
|
<li class="tab"><a href="#request">{{ _('Request') }}</a></li>
|
||||||
|
{% endif %}
|
||||||
{% if extra_tab_content %}
|
{% if extra_tab_content %}
|
||||||
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
<li class="tab"><a href="#extras_tab">{{ extra_tab_content }}</a></li>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% if capabilities.supports_browser_steps %}
|
||||||
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">{{ _('Browser Steps') }}</a></li>
|
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">{{ _('Browser Steps') }}</a></li>
|
||||||
<!-- should goto extra forms? -->
|
{% endif %}
|
||||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
{% if capabilities.supports_visual_selector %}
|
||||||
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
<li class="tab"><a id="visualselector-tab" href="#visualselector">{{ _('Visual Filter Selector') }}</a></li>
|
||||||
|
{% endif %}
|
||||||
|
{% if capabilities.supports_text_filters_and_triggers %}
|
||||||
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
<li class="tab" id="filters-and-triggers-tab"><a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a></li>
|
||||||
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
<li class="tab" id="conditions-tab"><a href="#conditions">{{ _('Conditions') }}</a></li>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
@@ -116,6 +121,7 @@
|
|||||||
</fieldset>
|
</fieldset>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{% if capabilities.supports_request_type %}
|
||||||
<div class="tab-pane-inner" id="request">
|
<div class="tab-pane-inner" id="request">
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
{{ render_field(form.fetch_backend, class="fetch-backend") }}
|
||||||
@@ -203,6 +209,7 @@ Math: {{ 1 + 1 }}") }}
|
|||||||
</div>
|
</div>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
<div class="tab-pane-inner" id="browser-steps">
|
<div class="tab-pane-inner" id="browser-steps">
|
||||||
{% if capabilities.supports_browser_steps %}
|
{% if capabilities.supports_browser_steps %}
|
||||||
@@ -283,8 +290,7 @@ Math: {{ 1 + 1 }}") }}
|
|||||||
</fieldset>
|
</fieldset>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
{% if capabilities.supports_text_filters_and_triggers %}
|
||||||
|
|
||||||
<div class="tab-pane-inner" id="conditions">
|
<div class="tab-pane-inner" id="conditions">
|
||||||
<script>
|
<script>
|
||||||
const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}";
|
const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}";
|
||||||
@@ -303,7 +309,9 @@ Math: {{ 1 + 1 }}") }}
|
|||||||
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
<span id="activate-text-preview" class="pure-button pure-button-primary button-xsmall">{{ _('Activate preview') }}</span>
|
||||||
<div>
|
<div>
|
||||||
<div id="edit-text-filter">
|
<div id="edit-text-filter">
|
||||||
<div class="pure-control-group" id="pro-tips">
|
|
||||||
|
{% if capabilities.supports_text_filters_and_triggers_elements %}
|
||||||
|
<div class="pure-control-group" id="pro-tips">
|
||||||
<strong>{{ _('Pro-tips:') }}</strong><br>
|
<strong>{{ _('Pro-tips:') }}</strong><br>
|
||||||
<ul>
|
<ul>
|
||||||
<li>
|
<li>
|
||||||
@@ -314,8 +322,8 @@ Math: {{ 1 + 1 }}") }}
|
|||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% include "edit/include_subtract.html" %}
|
{% include "edit/include_subtract.html" %}
|
||||||
|
{% endif %}
|
||||||
<div class="text-filtering border-fieldset">
|
<div class="text-filtering border-fieldset">
|
||||||
<fieldset class="pure-group" id="text-filtering-type-options">
|
<fieldset class="pure-group" id="text-filtering-type-options">
|
||||||
<h3>{{ _('Text filtering') }}</h3>
|
<h3>{{ _('Text filtering') }}</h3>
|
||||||
@@ -374,7 +382,7 @@ Math: {{ 1 + 1 }}") }}
|
|||||||
{{ extra_form_content|safe }}
|
{{ extra_form_content|safe }}
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if watch['processor'] == 'text_json_diff' or watch['processor'] == 'image_ssim_diff' %}
|
{% if capabilities.supports_visual_selector %}
|
||||||
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
|
||||||
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
|
||||||
|
|
||||||
@@ -386,7 +394,7 @@ Math: {{ 1 + 1 }}") }}
|
|||||||
{{ _('The Visual Selector tool lets you select the') }} <i>{{ _('text') }}</i> {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} <a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a> {{ _('tab. Use') }} <strong>{{ _('Shift+Click') }}</strong> {{ _('to select multiple items.') }}
|
{{ _('The Visual Selector tool lets you select the') }} <i>{{ _('text') }}</i> {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} <a href="#filters-and-triggers">{{ _('Filters & Triggers') }}</a> {{ _('tab. Use') }} <strong>{{ _('Shift+Click') }}</strong> {{ _('to select multiple items.') }}
|
||||||
</span>
|
</span>
|
||||||
|
|
||||||
{% if watch['processor'] == 'image_ssim_diff' %}
|
{% if watch['processor'] == 'image_ssim_diff' %} {# @todo, integrate with image_ssim_diff selector better, use some extra form ? #}
|
||||||
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
|
<div id="selection-mode-controls" style="margin: 10px 0; padding: 10px; background: var(--color-background-tab); border-radius: 5px;">
|
||||||
<label style="font-weight: 600; margin-right: 15px;">{{ _('Selection Mode:') }}</label>
|
<label style="font-weight: 600; margin-right: 15px;">{{ _('Selection Mode:') }}</label>
|
||||||
<label style="margin-right: 15px;">
|
<label style="margin-right: 15px;">
|
||||||
|
|||||||
@@ -24,7 +24,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
flash(gettext('Warning, URL {} already exists').format(url), "notice")
|
flash(gettext('Warning, URL {} already exists').format(url), "notice")
|
||||||
|
|
||||||
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||||
processor = request.form.get('processor', 'text_json_diff')
|
from changedetectionio import processors
|
||||||
|
processor = request.form.get('processor', processors.get_default_processor())
|
||||||
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
|
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||||
|
|
||||||
if new_uuid:
|
if new_uuid:
|
||||||
|
|||||||
@@ -205,23 +205,24 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
|
|||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<span class="watch-title">
|
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
||||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
||||||
{{ watch.label }}
|
{%- endif -%}
|
||||||
{% else %}
|
<span class="watch-title">
|
||||||
{{ watch.get('title') or watch.link }}
|
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||||
{% endif %}
|
{{ watch.label }}
|
||||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
{% else %}
|
||||||
</span>
|
{{ watch.get('title') or watch.link }}
|
||||||
|
{% endif %}
|
||||||
|
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||||
|
</span>
|
||||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
|
|
||||||
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
|
|
||||||
{%- endif -%}
|
|
||||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||||
<a href="{{url_for('watchlist.index', tag=watch_tag_uuid) }}" class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</a>
|
<a href="{{url_for('watchlist.index', tag=watch_tag_uuid) }}" class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</a>
|
||||||
{%- endfor -%}
|
{%- endfor -%}
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
import asyncio
|
|
||||||
import gc
|
import gc
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
@@ -350,7 +349,12 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
if self.status_code != 200 and not ignore_status_codes:
|
if self.status_code != 200 and not ignore_status_codes:
|
||||||
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
screenshot = await capture_full_page_async(self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||||
# Finally block will handle cleanup
|
# Cleanup before raising to prevent memory leak
|
||||||
|
await self.page.close()
|
||||||
|
await context.close()
|
||||||
|
await browser.close()
|
||||||
|
# Force garbage collection to release Playwright resources immediately
|
||||||
|
gc.collect()
|
||||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||||
|
|
||||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
||||||
@@ -366,7 +370,12 @@ class fetcher(Fetcher):
|
|||||||
try:
|
try:
|
||||||
await self.iterate_browser_steps(start_url=url)
|
await self.iterate_browser_steps(start_url=url)
|
||||||
except BrowserStepsStepException:
|
except BrowserStepsStepException:
|
||||||
# Finally block will handle cleanup
|
try:
|
||||||
|
await context.close()
|
||||||
|
await browser.close()
|
||||||
|
except Exception as e:
|
||||||
|
# Fine, could be messy situation
|
||||||
|
pass
|
||||||
raise
|
raise
|
||||||
|
|
||||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
await self.page.wait_for_timeout(extra_wait * 1000)
|
||||||
@@ -415,40 +424,35 @@ class fetcher(Fetcher):
|
|||||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Clean up resources properly with timeouts to prevent hanging
|
# Request garbage collection one more time before closing
|
||||||
try:
|
try:
|
||||||
if hasattr(self, 'page') and self.page:
|
await self.page.request_gc()
|
||||||
await self.page.request_gc()
|
except:
|
||||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
pass
|
||||||
logger.debug(f"Successfully closed page for {url}")
|
|
||||||
except asyncio.TimeoutError:
|
# Clean up resources properly
|
||||||
logger.warning(f"Timed out closing page for {url} (5s)")
|
try:
|
||||||
except Exception as e:
|
await self.page.request_gc()
|
||||||
logger.warning(f"Error closing page for {url}: {e}")
|
except:
|
||||||
finally:
|
pass
|
||||||
self.page = None
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if context:
|
await self.page.close()
|
||||||
await asyncio.wait_for(context.close(), timeout=5.0)
|
except:
|
||||||
logger.debug(f"Successfully closed context for {url}")
|
pass
|
||||||
except asyncio.TimeoutError:
|
self.page = None
|
||||||
logger.warning(f"Timed out closing context for {url} (5s)")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error closing context for {url}: {e}")
|
|
||||||
finally:
|
|
||||||
context = None
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if browser:
|
await context.close()
|
||||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
except:
|
||||||
logger.debug(f"Successfully closed browser connection for {url}")
|
pass
|
||||||
except asyncio.TimeoutError:
|
context = None
|
||||||
logger.warning(f"Timed out closing browser connection for {url} (5s)")
|
|
||||||
except Exception as e:
|
try:
|
||||||
logger.warning(f"Error closing browser for {url}: {e}")
|
await browser.close()
|
||||||
finally:
|
except:
|
||||||
browser = None
|
pass
|
||||||
|
browser = None
|
||||||
|
|
||||||
# Force Python GC to release Playwright resources immediately
|
# Force Python GC to release Playwright resources immediately
|
||||||
# Playwright objects can have circular references that delay cleanup
|
# Playwright objects can have circular references that delay cleanup
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import gc
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import websockets.exceptions
|
import websockets.exceptions
|
||||||
@@ -222,36 +221,19 @@ class fetcher(Fetcher):
|
|||||||
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
|
self.browser_connection_url += f"{r}--proxy-server={proxy_url}"
|
||||||
|
|
||||||
async def quit(self, watch=None):
|
async def quit(self, watch=None):
|
||||||
watch_uuid = watch.get('uuid') if watch else 'unknown'
|
|
||||||
|
|
||||||
# Close page
|
|
||||||
try:
|
try:
|
||||||
if hasattr(self, 'page') and self.page:
|
await self.page.close()
|
||||||
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
del self.page
|
||||||
logger.debug(f"[{watch_uuid}] Page closed successfully")
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
logger.warning(f"[{watch_uuid}] Timed out closing page (5s)")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"[{watch_uuid}] Error closing page: {e}")
|
pass
|
||||||
finally:
|
|
||||||
self.page = None
|
|
||||||
|
|
||||||
# Close browser connection
|
|
||||||
try:
|
try:
|
||||||
if hasattr(self, 'browser') and self.browser:
|
await self.browser.close()
|
||||||
await asyncio.wait_for(self.browser.close(), timeout=5.0)
|
del self.browser
|
||||||
logger.debug(f"[{watch_uuid}] Browser closed successfully")
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
logger.warning(f"[{watch_uuid}] Timed out closing browser (5s)")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"[{watch_uuid}] Error closing browser: {e}")
|
pass
|
||||||
finally:
|
|
||||||
self.browser = None
|
|
||||||
|
|
||||||
logger.info(f"[{watch_uuid}] Cleanup puppeteer complete")
|
logger.info("Cleanup puppeteer complete.")
|
||||||
|
|
||||||
# Force garbage collection to release resources
|
|
||||||
gc.collect()
|
|
||||||
|
|
||||||
async def fetch_page(self,
|
async def fetch_page(self,
|
||||||
current_include_filters,
|
current_include_filters,
|
||||||
@@ -281,11 +263,9 @@ class fetcher(Fetcher):
|
|||||||
# Connect directly using the specified browser_ws_endpoint
|
# Connect directly using the specified browser_ws_endpoint
|
||||||
# @todo timeout
|
# @todo timeout
|
||||||
try:
|
try:
|
||||||
logger.debug(f"[{watch_uuid}] Connecting to browser at {self.browser_connection_url}")
|
|
||||||
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
self.browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url,
|
||||||
ignoreHTTPSErrors=True
|
ignoreHTTPSErrors=True
|
||||||
)
|
)
|
||||||
logger.debug(f"[{watch_uuid}] Browser connected successfully")
|
|
||||||
except websockets.exceptions.InvalidStatusCode as e:
|
except websockets.exceptions.InvalidStatusCode as e:
|
||||||
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access, whitelist IP, password etc)")
|
||||||
except websockets.exceptions.InvalidURI:
|
except websockets.exceptions.InvalidURI:
|
||||||
@@ -294,18 +274,7 @@ class fetcher(Fetcher):
|
|||||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||||
|
|
||||||
# more reliable is to just request a new page
|
# more reliable is to just request a new page
|
||||||
try:
|
self.page = await self.browser.newPage()
|
||||||
logger.debug(f"[{watch_uuid}] Creating new page")
|
|
||||||
self.page = await self.browser.newPage()
|
|
||||||
logger.debug(f"[{watch_uuid}] Page created successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"[{watch_uuid}] Failed to create new page: {e}")
|
|
||||||
# Browser is connected but page creation failed - must cleanup browser
|
|
||||||
try:
|
|
||||||
await asyncio.wait_for(self.browser.close(), timeout=3.0)
|
|
||||||
except Exception as cleanup_error:
|
|
||||||
logger.error(f"[{watch_uuid}] Failed to cleanup browser after page creation failure: {cleanup_error}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Add console handler to capture console.log from favicon fetcher
|
# Add console handler to capture console.log from favicon fetcher
|
||||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||||
@@ -374,12 +343,6 @@ class fetcher(Fetcher):
|
|||||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
w = extra_wait - 2 if extra_wait > 4 else 2
|
||||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
||||||
await asyncio.sleep(w)
|
await asyncio.sleep(w)
|
||||||
|
|
||||||
# Check if page still exists (might have been closed due to error during sleep)
|
|
||||||
if not self.page or not hasattr(self.page, '_client'):
|
|
||||||
logger.debug("Page already closed, skipping stopLoading")
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.debug("Issuing stopLoading command...")
|
logger.debug("Issuing stopLoading command...")
|
||||||
await self.page._client.send('Page.stopLoading')
|
await self.page._client.send('Page.stopLoading')
|
||||||
logger.debug("stopLoading command sent!")
|
logger.debug("stopLoading command sent!")
|
||||||
@@ -405,9 +368,7 @@ class fetcher(Fetcher):
|
|||||||
asyncio.create_task(handle_frame_navigation())
|
asyncio.create_task(handle_frame_navigation())
|
||||||
response = await self.page.goto(url, timeout=0)
|
response = await self.page.goto(url, timeout=0)
|
||||||
await asyncio.sleep(1 + extra_wait)
|
await asyncio.sleep(1 + extra_wait)
|
||||||
# Check if page still exists before sending command
|
await self.page._client.send('Page.stopLoading')
|
||||||
if self.page and hasattr(self.page, '_client'):
|
|
||||||
await self.page._client.send('Page.stopLoading')
|
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
break
|
break
|
||||||
@@ -476,9 +437,15 @@ class fetcher(Fetcher):
|
|||||||
logger.debug(f"Screenshot format {self.screenshot_format}")
|
logger.debug(f"Screenshot format {self.screenshot_format}")
|
||||||
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
self.screenshot = await capture_full_page(page=self.page, screenshot_format=self.screenshot_format, watch_uuid=watch_uuid, lock_viewport_elements=self.lock_viewport_elements)
|
||||||
|
|
||||||
# Force garbage collection - pyppeteer base64 decode creates temporary buffers
|
# Force aggressive memory cleanup - pyppeteer base64 decode creates temporary buffers
|
||||||
import gc
|
import gc
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
# Release C-level memory from base64 decode back to OS
|
||||||
|
try:
|
||||||
|
import ctypes
|
||||||
|
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||||
"max_height": MAX_TOTAL_HEIGHT
|
"max_height": MAX_TOTAL_HEIGHT
|
||||||
|
|||||||
@@ -730,7 +730,7 @@ class quickWatchForm(Form):
|
|||||||
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
url = fields.URLField(_l('URL'), validators=[validateURL()])
|
||||||
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()])
|
||||||
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||||
edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"})
|
||||||
|
|
||||||
|
|
||||||
@@ -749,7 +749,7 @@ class commonSettingsForm(Form):
|
|||||||
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items()))
|
||||||
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||||
notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||||
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default="text_json_diff")
|
processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||||
scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||||
webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))])
|
webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))])
|
||||||
|
|
||||||
@@ -763,7 +763,7 @@ class commonSettingsForm(Form):
|
|||||||
|
|
||||||
|
|
||||||
class importForm(Form):
|
class importForm(Form):
|
||||||
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default="text_json_diff")
|
processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor)
|
||||||
urls = TextAreaField(_l('URLs'))
|
urls = TextAreaField(_l('URLs'))
|
||||||
xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))])
|
xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))])
|
||||||
file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})
|
||||||
|
|||||||
@@ -105,6 +105,30 @@ class ChangeDetectionSpec:
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@hookspec
|
||||||
|
def register_processor(self):
|
||||||
|
"""Register an external processor plugin.
|
||||||
|
|
||||||
|
External packages can implement this hook to register custom processors
|
||||||
|
that will be discovered alongside built-in processors.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict or None: Dictionary with processor information:
|
||||||
|
{
|
||||||
|
'processor_name': str, # Machine name (e.g., 'osint_recon')
|
||||||
|
'processor_module': module, # Module containing processor.py
|
||||||
|
'processor_class': class, # The perform_site_check class
|
||||||
|
'metadata': { # Optional metadata
|
||||||
|
'name': str, # Display name
|
||||||
|
'description': str, # Description
|
||||||
|
'processor_weight': int,# Sort weight (lower = higher priority)
|
||||||
|
'list_badge_text': str, # Badge text for UI
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Return None if this plugin doesn't provide a processor
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Set up Plugin Manager
|
# Set up Plugin Manager
|
||||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||||
|
|||||||
@@ -17,9 +17,11 @@ def find_sub_packages(package_name):
|
|||||||
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
|
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
def find_processors():
|
def find_processors():
|
||||||
"""
|
"""
|
||||||
Find all subclasses of DifferenceDetectionProcessor in the specified package.
|
Find all subclasses of DifferenceDetectionProcessor in the specified package.
|
||||||
|
Results are cached to avoid repeated discovery.
|
||||||
|
|
||||||
:param package_name: The name of the package to scan for processor modules.
|
:param package_name: The name of the package to scan for processor modules.
|
||||||
:return: A list of (module, class) tuples.
|
:return: A list of (module, class) tuples.
|
||||||
@@ -46,6 +48,22 @@ def find_processors():
|
|||||||
except (ModuleNotFoundError, ImportError) as e:
|
except (ModuleNotFoundError, ImportError) as e:
|
||||||
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
|
||||||
|
|
||||||
|
# Discover plugin processors via pluggy
|
||||||
|
try:
|
||||||
|
from changedetectionio.pluggy_interface import plugin_manager
|
||||||
|
plugin_results = plugin_manager.hook.register_processor()
|
||||||
|
|
||||||
|
for result in plugin_results:
|
||||||
|
if result and isinstance(result, dict):
|
||||||
|
processor_module = result.get('processor_module')
|
||||||
|
processor_name = result.get('processor_name')
|
||||||
|
|
||||||
|
if processor_module and processor_name:
|
||||||
|
processors.append((processor_module, processor_name))
|
||||||
|
logger.info(f"Registered plugin processor: {processor_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error loading plugin processors: {e}")
|
||||||
|
|
||||||
return processors
|
return processors
|
||||||
|
|
||||||
|
|
||||||
@@ -97,54 +115,137 @@ def find_processor_module(processor_name):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_processor_module(processor_name):
|
||||||
|
"""
|
||||||
|
Get the actual processor module (with perform_site_check class) by name.
|
||||||
|
Works for both built-in and plugin processors.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
module: The processor module containing perform_site_check, or None if not found
|
||||||
|
"""
|
||||||
|
processor_classes = find_processors()
|
||||||
|
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||||
|
|
||||||
|
if processor_tuple:
|
||||||
|
# Return the actual processor module (first element of tuple)
|
||||||
|
return processor_tuple[0]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_processor_submodule(processor_name, submodule_name):
|
||||||
|
"""
|
||||||
|
Get an optional submodule from a processor (e.g., 'difference', 'extract', 'preview').
|
||||||
|
Works for both built-in and plugin processors.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
|
||||||
|
submodule_name: Name of the submodule (e.g., 'difference', 'extract', 'preview')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
module: The submodule if it exists, or None if not found
|
||||||
|
"""
|
||||||
|
processor_classes = find_processors()
|
||||||
|
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
|
||||||
|
|
||||||
|
if not processor_tuple:
|
||||||
|
return None
|
||||||
|
|
||||||
|
processor_module = processor_tuple[0]
|
||||||
|
parent_module = get_parent_module(processor_module)
|
||||||
|
|
||||||
|
if not parent_module:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Try to import the submodule
|
||||||
|
try:
|
||||||
|
# For built-in processors: changedetectionio.processors.text_json_diff.difference
|
||||||
|
# For plugin processors: changedetectionio_osint.difference
|
||||||
|
parent_module_name = parent_module.__name__
|
||||||
|
submodule_full_name = f"{parent_module_name}.{submodule_name}"
|
||||||
|
return importlib.import_module(submodule_full_name)
|
||||||
|
except (ModuleNotFoundError, ImportError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def get_plugin_processor_metadata():
|
||||||
|
"""Get metadata from plugin processors."""
|
||||||
|
metadata = {}
|
||||||
|
try:
|
||||||
|
from changedetectionio.pluggy_interface import plugin_manager
|
||||||
|
plugin_results = plugin_manager.hook.register_processor()
|
||||||
|
|
||||||
|
for result in plugin_results:
|
||||||
|
if result and isinstance(result, dict):
|
||||||
|
processor_name = result.get('processor_name')
|
||||||
|
meta = result.get('metadata', {})
|
||||||
|
if processor_name:
|
||||||
|
metadata[processor_name] = meta
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error getting plugin processor metadata: {e}")
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
def available_processors():
|
def available_processors():
|
||||||
"""
|
"""
|
||||||
Get a list of processors by name and description for the UI elements.
|
Get a list of processors by name and description for the UI elements.
|
||||||
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
|
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
|
||||||
:return: A list :)
|
:return: A list :)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
processor_classes = find_processors()
|
processor_classes = find_processors()
|
||||||
|
|
||||||
# Check if ALLOWED_PROCESSORS env var is set
|
# Check if DISABLED_PROCESSORS env var is set
|
||||||
# For now we disable it, need to make a deploy with lots of new code and this will be an overload
|
disabled_processors_env = os.getenv('DISABLED_PROCESSORS', 'image_ssim_diff').strip()
|
||||||
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', 'text_json_diff, restock_diff').strip()
|
disabled_processors = []
|
||||||
allowed_processors = None
|
if disabled_processors_env:
|
||||||
if allowed_processors_env:
|
|
||||||
# Parse comma-separated list and strip whitespace
|
# Parse comma-separated list and strip whitespace
|
||||||
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
|
disabled_processors = [p.strip() for p in disabled_processors_env.split(',') if p.strip()]
|
||||||
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
|
logger.info(f"DISABLED_PROCESSORS set, disabling: {disabled_processors}")
|
||||||
|
|
||||||
available = []
|
available = []
|
||||||
|
plugin_metadata = get_plugin_processor_metadata()
|
||||||
|
|
||||||
for module, sub_package_name in processor_classes:
|
for module, sub_package_name in processor_classes:
|
||||||
# Filter by allowed processors if set
|
# Skip disabled processors
|
||||||
if allowed_processors and sub_package_name not in allowed_processors:
|
if sub_package_name in disabled_processors:
|
||||||
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
|
logger.debug(f"Skipping processor '{sub_package_name}' (in DISABLED_PROCESSORS)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Try to get the 'name' attribute from the processor module first
|
# Check if this is a plugin processor
|
||||||
if hasattr(module, 'name'):
|
if sub_package_name in plugin_metadata:
|
||||||
description = gettext(module.name)
|
meta = plugin_metadata[sub_package_name]
|
||||||
|
description = gettext(meta.get('name', sub_package_name))
|
||||||
|
# Plugin processors start from weight 10 to separate them from built-in processors
|
||||||
|
weight = 100 + meta.get('processor_weight', 0)
|
||||||
else:
|
else:
|
||||||
# Fall back to processor_description from parent module's __init__.py
|
# Try to get the 'name' attribute from the processor module first
|
||||||
parent_module = get_parent_module(module)
|
if hasattr(module, 'name'):
|
||||||
if parent_module and hasattr(parent_module, 'processor_description'):
|
description = gettext(module.name)
|
||||||
description = gettext(parent_module.processor_description)
|
|
||||||
else:
|
else:
|
||||||
# Final fallback to a readable name
|
# Fall back to processor_description from parent module's __init__.py
|
||||||
description = sub_package_name.replace('_', ' ').title()
|
parent_module = get_parent_module(module)
|
||||||
|
if parent_module and hasattr(parent_module, 'processor_description'):
|
||||||
|
description = gettext(parent_module.processor_description)
|
||||||
|
else:
|
||||||
|
# Final fallback to a readable name
|
||||||
|
description = sub_package_name.replace('_', ' ').title()
|
||||||
|
|
||||||
# Get weight for sorting (lower weight = higher in list)
|
# Get weight for sorting (lower weight = higher in list)
|
||||||
weight = 0 # Default weight for processors without explicit weight
|
weight = 0 # Default weight for processors without explicit weight
|
||||||
|
|
||||||
# Check processor module itself first
|
# Check processor module itself first
|
||||||
if hasattr(module, 'processor_weight'):
|
if hasattr(module, 'processor_weight'):
|
||||||
weight = module.processor_weight
|
weight = module.processor_weight
|
||||||
else:
|
else:
|
||||||
# Fall back to parent module (package __init__.py)
|
# Fall back to parent module (package __init__.py)
|
||||||
parent_module = get_parent_module(module)
|
parent_module = get_parent_module(module)
|
||||||
if parent_module and hasattr(parent_module, 'processor_weight'):
|
if parent_module and hasattr(parent_module, 'processor_weight'):
|
||||||
weight = parent_module.processor_weight
|
weight = parent_module.processor_weight
|
||||||
|
|
||||||
available.append((sub_package_name, description, weight))
|
available.append((sub_package_name, description, weight))
|
||||||
|
|
||||||
@@ -155,6 +256,20 @@ def available_processors():
|
|||||||
return [(name, desc) for name, desc, weight in available]
|
return [(name, desc) for name, desc, weight in available]
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_processor():
|
||||||
|
"""
|
||||||
|
Get the default processor to use when none is specified.
|
||||||
|
Returns the first available processor based on weight (lowest weight = highest priority).
|
||||||
|
This ensures forms auto-select a valid processor even when DISABLED_PROCESSORS filters the list.
|
||||||
|
|
||||||
|
:return: The processor name string (e.g., 'text_json_diff')
|
||||||
|
"""
|
||||||
|
available = available_processors()
|
||||||
|
if available:
|
||||||
|
return available[0][0] # Return the processor name from first tuple
|
||||||
|
return 'text_json_diff' # Fallback if somehow no processors are available
|
||||||
|
|
||||||
|
|
||||||
def get_processor_badge_texts():
|
def get_processor_badge_texts():
|
||||||
"""
|
"""
|
||||||
Get a dictionary mapping processor names to their list_badge_text values.
|
Get a dictionary mapping processor names to their list_badge_text values.
|
||||||
@@ -279,3 +394,76 @@ def get_processor_badge_css():
|
|||||||
|
|
||||||
return '\n\n'.join(css_rules)
|
return '\n\n'.join(css_rules)
|
||||||
|
|
||||||
|
|
||||||
|
def save_processor_config(datastore, watch_uuid, config_data):
|
||||||
|
"""
|
||||||
|
Save processor-specific configuration to JSON file.
|
||||||
|
|
||||||
|
This is a shared helper function used by both the UI edit form and API endpoints
|
||||||
|
to consistently handle processor configuration storage.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
datastore: The application datastore instance
|
||||||
|
watch_uuid: UUID of the watch
|
||||||
|
config_data: Dictionary of configuration data to save (with processor_config_* prefix removed)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if saved successfully, False otherwise
|
||||||
|
"""
|
||||||
|
if not config_data:
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
from changedetectionio.processors.base import difference_detection_processor
|
||||||
|
|
||||||
|
# Get processor name from watch
|
||||||
|
watch = datastore.data['watching'].get(watch_uuid)
|
||||||
|
if not watch:
|
||||||
|
logger.error(f"Cannot save processor config: watch {watch_uuid} not found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
processor_name = watch.get('processor', 'text_json_diff')
|
||||||
|
|
||||||
|
# Create a processor instance to access config methods
|
||||||
|
processor_instance = difference_detection_processor(datastore, watch_uuid)
|
||||||
|
|
||||||
|
# Use processor name as filename so each processor keeps its own config
|
||||||
|
config_filename = f'{processor_name}.json'
|
||||||
|
processor_instance.update_extra_watch_config(config_filename, config_data)
|
||||||
|
|
||||||
|
logger.debug(f"Saved processor config to {config_filename}: {config_data}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to save processor config: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extract_processor_config_from_form_data(form_data):
|
||||||
|
"""
|
||||||
|
Extract processor_config_* fields from form data and return separate dicts.
|
||||||
|
|
||||||
|
This is a shared helper function used by both the UI edit form and API endpoints
|
||||||
|
to consistently handle processor configuration extraction.
|
||||||
|
|
||||||
|
IMPORTANT: This function modifies form_data in-place by removing processor_config_* fields.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
form_data: Dictionary of form data (will be modified in-place)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Dictionary of processor config data (with processor_config_* prefix removed)
|
||||||
|
"""
|
||||||
|
processor_config_data = {}
|
||||||
|
|
||||||
|
# Use list() to create a copy of keys since we're modifying the dict
|
||||||
|
for field_name in list(form_data.keys()):
|
||||||
|
if field_name.startswith('processor_config_'):
|
||||||
|
config_key = field_name.replace('processor_config_', '')
|
||||||
|
# Save all values (including empty strings) to allow explicit clearing of settings
|
||||||
|
processor_config_data[config_key] = form_data[field_name]
|
||||||
|
# Remove from form_data to prevent it from reaching datastore
|
||||||
|
del form_data[field_name]
|
||||||
|
|
||||||
|
return processor_config_data
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,13 @@ processor_description = "Visual/Screenshot change detection (Fast)"
|
|||||||
processor_name = "image_ssim_diff"
|
processor_name = "image_ssim_diff"
|
||||||
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
|
processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom)
|
||||||
|
|
||||||
|
# Processor capabilities
|
||||||
|
supports_visual_selector = True
|
||||||
|
supports_browser_steps = True
|
||||||
|
supports_text_filters_and_triggers = False
|
||||||
|
supports_text_filters_and_triggers_elements = False
|
||||||
|
supports_request_type = True
|
||||||
|
|
||||||
PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json"
|
PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json"
|
||||||
|
|
||||||
# Subprocess timeout settings
|
# Subprocess timeout settings
|
||||||
|
|||||||
@@ -4,6 +4,13 @@ from changedetectionio.model.Watch import model as BaseWatch
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
# Processor capabilities
|
||||||
|
supports_visual_selector = True
|
||||||
|
supports_browser_steps = True
|
||||||
|
supports_text_filters_and_triggers = True
|
||||||
|
supports_text_filters_and_triggers_elements = True
|
||||||
|
supports_request_type = True
|
||||||
|
|
||||||
class Restock(dict):
|
class Restock(dict):
|
||||||
|
|
||||||
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
def parse_currency(self, raw_value: str) -> Union[float, None]:
|
||||||
|
|||||||
@@ -1,6 +1,12 @@
|
|||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
# Processor capabilities
|
||||||
|
supports_visual_selector = True
|
||||||
|
supports_browser_steps = True
|
||||||
|
supports_text_filters_and_triggers = True
|
||||||
|
supports_text_filters_and_triggers_elements = True
|
||||||
|
supports_request_type = True
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _task(watch, update_handler):
|
def _task(watch, update_handler):
|
||||||
|
|||||||
@@ -142,11 +142,14 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
|||||||
processor = watch.get('processor', 'text_json_diff')
|
processor = watch.get('processor', 'text_json_diff')
|
||||||
|
|
||||||
# Init a new 'difference_detection_processor'
|
# Init a new 'difference_detection_processor'
|
||||||
try:
|
# Use get_processor_module() to support both built-in and plugin processors
|
||||||
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
|
from changedetectionio.processors import get_processor_module
|
||||||
except ModuleNotFoundError as e:
|
processor_module = get_processor_module(processor)
|
||||||
print(f"Processor module '{processor}' not found.")
|
|
||||||
raise e
|
if not processor_module:
|
||||||
|
error_msg = f"Processor module '{processor}' not found."
|
||||||
|
logger.error(error_msg)
|
||||||
|
raise ModuleNotFoundError(error_msg)
|
||||||
|
|
||||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||||
watch_uuid=uuid)
|
watch_uuid=uuid)
|
||||||
@@ -475,9 +478,14 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
|||||||
del update_handler
|
del update_handler
|
||||||
update_handler = None
|
update_handler = None
|
||||||
|
|
||||||
# Force garbage collection
|
# Force aggressive memory cleanup after clearing
|
||||||
import gc
|
import gc
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
try:
|
||||||
|
import ctypes
|
||||||
|
ctypes.CDLL('libc.so.6').malloc_trim(0)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||||
@@ -490,7 +498,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
|||||||
finally:
|
finally:
|
||||||
# Always cleanup - this runs whether there was an exception or not
|
# Always cleanup - this runs whether there was an exception or not
|
||||||
if uuid:
|
if uuid:
|
||||||
# Call quit() as backup (Puppeteer/Playwright have internal cleanup, but this acts as safety net)
|
|
||||||
try:
|
try:
|
||||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||||
await update_handler.fetcher.quit(watch=watch)
|
await update_handler.fetcher.quit(watch=watch)
|
||||||
@@ -499,25 +506,35 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
|
|||||||
try:
|
try:
|
||||||
# Release UUID from processing (thread-safe)
|
# Release UUID from processing (thread-safe)
|
||||||
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
|
worker_pool.release_uuid_from_processing(uuid, worker_id=worker_id)
|
||||||
|
|
||||||
# Send completion signal
|
# Send completion signal
|
||||||
if watch:
|
if watch:
|
||||||
|
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
|
||||||
watch_check_update.send(watch_uuid=watch['uuid'])
|
watch_check_update.send(watch_uuid=watch['uuid'])
|
||||||
|
|
||||||
# Clean up all memory references BEFORE garbage collection
|
# Explicitly clean up update_handler and all its references
|
||||||
if update_handler:
|
if update_handler:
|
||||||
|
# Clear fetcher content using the proper method
|
||||||
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||||
update_handler.fetcher.clear_content()
|
update_handler.fetcher.clear_content()
|
||||||
|
|
||||||
|
# Clear processor references
|
||||||
if hasattr(update_handler, 'content_processor'):
|
if hasattr(update_handler, 'content_processor'):
|
||||||
update_handler.content_processor = None
|
update_handler.content_processor = None
|
||||||
del update_handler
|
|
||||||
update_handler = None
|
update_handler = None
|
||||||
|
|
||||||
# Clear large content variables
|
# Clear local contents variable if it still exists
|
||||||
if 'contents' in locals():
|
if 'contents' in locals():
|
||||||
del contents
|
del contents
|
||||||
|
|
||||||
# Force garbage collection after all references are cleared
|
# Note: We don't set watch = None here because:
|
||||||
|
# 1. watch is just a local reference to datastore.data['watching'][uuid]
|
||||||
|
# 2. Setting it to None doesn't affect the datastore
|
||||||
|
# 3. GC can't collect the object anyway (still referenced by datastore)
|
||||||
|
# 4. It would just cause confusion
|
||||||
|
|
||||||
|
# Force garbage collection after cleanup
|
||||||
import gc
|
import gc
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,13 @@ services:
|
|||||||
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
|
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
|
||||||
# - LOGGER_LEVEL=TRACE
|
# - LOGGER_LEVEL=TRACE
|
||||||
#
|
#
|
||||||
|
# Plugins! See https://changedetection.io/plugins for more plugins.
|
||||||
|
# Install additional Python packages (processor plugins, etc.)
|
||||||
|
# Example: Install the OSINT reconnaissance processor plugin
|
||||||
|
# - EXTRA_PACKAGES=changedetection.io-osint
|
||||||
|
# Multiple packages can be installed by separating with spaces:
|
||||||
|
# - EXTRA_PACKAGES=changedetection.io-osint another-plugin
|
||||||
|
#
|
||||||
#
|
#
|
||||||
# Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol)
|
# Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol)
|
||||||
# - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000
|
# - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000
|
||||||
|
|||||||
28
docker-entrypoint.sh
Executable file
28
docker-entrypoint.sh
Executable file
@@ -0,0 +1,28 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Install additional packages from EXTRA_PACKAGES env var
|
||||||
|
# Uses a marker file to avoid reinstalling on every container restart
|
||||||
|
INSTALLED_MARKER="/datastore/.extra_packages_installed"
|
||||||
|
CURRENT_PACKAGES="$EXTRA_PACKAGES"
|
||||||
|
|
||||||
|
if [ -n "$EXTRA_PACKAGES" ]; then
|
||||||
|
# Check if we need to install/update packages
|
||||||
|
if [ ! -f "$INSTALLED_MARKER" ] || [ "$(cat $INSTALLED_MARKER 2>/dev/null)" != "$CURRENT_PACKAGES" ]; then
|
||||||
|
echo "Installing extra packages: $EXTRA_PACKAGES"
|
||||||
|
pip3 install --no-cache-dir $EXTRA_PACKAGES
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "$CURRENT_PACKAGES" > "$INSTALLED_MARKER"
|
||||||
|
echo "Extra packages installed successfully"
|
||||||
|
else
|
||||||
|
echo "ERROR: Failed to install extra packages"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Extra packages already installed: $EXTRA_PACKAGES"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Execute the main command
|
||||||
|
exec "$@"
|
||||||
@@ -51,9 +51,9 @@ linkify-it-py
|
|||||||
|
|
||||||
# - Needed for apprise/spush, and maybe others? hopefully doesnt trigger a rust compile.
|
# - Needed for apprise/spush, and maybe others? hopefully doesnt trigger a rust compile.
|
||||||
# - Requires extra wheel for rPi, adds build time for arm/v8 which is not in piwheels
|
# - Requires extra wheel for rPi, adds build time for arm/v8 which is not in piwheels
|
||||||
# Pinned to 43.0.1 for ARM compatibility (45.x may not have pre-built ARM wheels)
|
# Pinned to 44.x for ARM compatibility and sslyze compatibility (sslyze requires <45) and (45.x may not have pre-built ARM wheels)
|
||||||
# Also pinned because dependabot wants specific versions
|
# Also pinned because dependabot wants specific versions
|
||||||
cryptography==46.0.3
|
cryptography==44.0.0
|
||||||
|
|
||||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||||
# use any version other than 2.0.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
|
# use any version other than 2.0.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
|
||||||
|
|||||||
Reference in New Issue
Block a user